Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
7af2ff16
Commit
7af2ff16
authored
Jun 11, 2021
by
Vincent Dumoulin
Committed by
A. Unique TensorFlower
Jun 11, 2021
Browse files
Internal change
PiperOrigin-RevId: 378869744
parent
a0be6885
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
199 additions
and
10 deletions
+199
-10
official/vision/beta/data/create_coco_tf_record.py
official/vision/beta/data/create_coco_tf_record.py
+27
-10
official/vision/beta/data/process_coco_few_shot.sh
official/vision/beta/data/process_coco_few_shot.sh
+48
-0
official/vision/beta/data/process_coco_few_shot_json_files.py
...cial/vision/beta/data/process_coco_few_shot_json_files.py
+124
-0
No files found.
official/vision/beta/data/create_coco_tf_record.py
View file @
7af2ff16
...
...
@@ -46,7 +46,7 @@ from official.vision.beta.data import tfrecord_lib
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
flags
.
DEFINE_string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_
multi_
string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_string
(
'image_info_file'
,
''
,
'File containing image information. '
'Tf Examples in the output files correspond to the image '
...
...
@@ -159,7 +159,7 @@ def encode_caption_annotations(caption_annotations):
def
create_tf_example
(
image
,
image_dir
,
image_dir
s
,
bbox_annotations
=
None
,
id_to_name_map
=
None
,
caption_annotations
=
None
,
...
...
@@ -169,7 +169,7 @@ def create_tf_example(image,
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
image_dir: director
y
containing the image files.
image_dir
s
:
list of
director
ies
containing the image files.
bbox_annotations:
list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
...
...
@@ -190,14 +190,31 @@ def create_tf_example(image,
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
ValueError: if the image pointed to by data['filename'] is not a valid JPEG,
does not exist, or is not unique across image directories.
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
if
len
(
image_dirs
)
>
1
:
full_paths
=
[
os
.
path
.
join
(
image_dir
,
filename
)
for
image_dir
in
image_dirs
]
full_existing_paths
=
[
p
for
p
in
full_paths
if
tf
.
io
.
gfile
.
exists
(
p
)]
if
not
full_existing_paths
:
raise
ValueError
(
'{} does not exist across image directories.'
.
format
(
filename
))
if
len
(
full_existing_paths
)
>
1
:
raise
ValueError
(
'{} is not unique across image directories'
.
format
(
filename
))
full_path
,
=
full_existing_paths
# If there is only one image directory, it's not worth checking for existence,
# since trying to open the file will raise an informative error message if it
# does not exist.
else
:
image_dir
,
=
image_dirs
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
io
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
...
...
@@ -276,7 +293,7 @@ def _load_images_info(images_info_file):
return
info_dict
[
'images'
]
def
generate_annotations
(
images
,
image_dir
,
def
generate_annotations
(
images
,
image_dir
s
,
img_to_obj_annotation
=
None
,
img_to_caption_annotation
=
None
,
id_to_name_map
=
None
,
include_masks
=
False
):
...
...
@@ -289,12 +306,12 @@ def generate_annotations(images, image_dir,
caption_annotaion
=
(
img_to_caption_annotation
.
get
(
image
[
'id'
],
None
)
if
img_to_caption_annotation
else
None
)
yield
(
image
,
image_dir
,
object_annotation
,
id_to_name_map
,
yield
(
image
,
image_dir
s
,
object_annotation
,
id_to_name_map
,
caption_annotaion
,
include_masks
)
def
_create_tf_record_from_coco_annotations
(
images_info_file
,
image_dir
,
image_dir
s
,
output_path
,
num_shards
,
object_annotations_file
=
None
,
...
...
@@ -309,7 +326,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
files Eg. 'image_info_test-dev2017.json',
'instance_annotations_train2017.json',
'caption_annotations_train2017.json', etc.
image_dir:
D
irector
y
containing the image files.
image_dir
s
:
List of d
irector
ies
containing the image files.
output_path: Path to output tf.Record file.
num_shards: Number of output files to create.
object_annotations_file: JSON file containing bounding box annotations.
...
...
@@ -333,7 +350,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
_load_caption_annotations
(
caption_annotations_file
))
coco_annotations_iter
=
generate_annotations
(
images
,
image_dir
,
img_to_obj_annotation
,
img_to_caption_annotation
,
images
,
image_dir
s
,
img_to_obj_annotation
,
img_to_caption_annotation
,
id_to_name_map
=
id_to_name_map
,
include_masks
=
include_masks
)
num_skipped
=
tfrecord_lib
.
write_tf_record_dataset
(
...
...
official/vision/beta/data/process_coco_few_shot.sh
0 → 100644
View file @
7af2ff16
#!/bin/bash
#
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir
=
$(
mktemp
-d
-t
coco-XXXXXXXXXX
)
output_dir
=
"/tmp/coco_few_shot"
while
getopts
"o:"
o
;
do
case
"
${
o
}
"
in
o
)
output_dir
=
${
OPTARG
}
;;
*
)
echo
"Usage:
${
0
}
[-o <output_dir>]"
1>&2
;
exit
1
;;
esac
done
cocosplit_url
=
"dl.yf.io/fs-det/datasets/cocosplit"
wget
--recursive
--no-parent
-q
--show-progress
--progress
=
bar:force:noscroll
\
-P
"
${
tmp_dir
}
"
-A
"5k.json,*10shot*.json,*30shot*.json"
\
"http://
${
cocosplit_url
}
/"
mv
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
*
"
${
tmp_dir
}
"
rm
-rf
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
python process_coco_few_shot_json_files.py
\
--logtostderr
--workdir
=
"
${
tmp_dir
}
"
for
seed
in
{
0..9
}
;
do
for
shots
in
10 30
;
do
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/
${
shots
}
shot_seed
${
seed
}
"
\
--num_shards
=
4
done
done
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/5k"
\
--num_shards
=
10
rm
-rf
"
${
tmp_dir
}
"
official/vision/beta/data/process_coco_few_shot_json_files.py
0 → 100644
View file @
7af2ff16
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Processes the JSON files for COCO few-shot.
We assume that `workdir` mirrors the contents of
http://dl.yf.io/fs-det/datasets/cocosplit/, which contains the official JSON
files for the few-shot COCO evaluation procedure that Wang et al. (2020)'s
"Frustratingly Simple Few-Shot Object Detection" paper uses.
"""
import
collections
import
itertools
import
json
import
logging
import
os
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
logger
=
tf
.
get_logger
()
logger
.
setLevel
(
logging
.
INFO
)
flags
.
DEFINE_string
(
'workdir'
,
None
,
'Working directory.'
)
FLAGS
=
flags
.
FLAGS
CATEGORIES
=
[
'airplane'
,
'apple'
,
'backpack'
,
'banana'
,
'baseball bat'
,
'baseball glove'
,
'bear'
,
'bed'
,
'bench'
,
'bicycle'
,
'bird'
,
'boat'
,
'book'
,
'bottle'
,
'bowl'
,
'broccoli'
,
'bus'
,
'cake'
,
'car'
,
'carrot'
,
'cat'
,
'cell phone'
,
'chair'
,
'clock'
,
'couch'
,
'cow'
,
'cup'
,
'dining table'
,
'dog'
,
'donut'
,
'elephant'
,
'fire hydrant'
,
'fork'
,
'frisbee'
,
'giraffe'
,
'hair drier'
,
'handbag'
,
'horse'
,
'hot dog'
,
'keyboard'
,
'kite'
,
'knife'
,
'laptop'
,
'microwave'
,
'motorcycle'
,
'mouse'
,
'orange'
,
'oven'
,
'parking meter'
,
'person'
,
'pizza'
,
'potted plant'
,
'refrigerator'
,
'remote'
,
'sandwich'
,
'scissors'
,
'sheep'
,
'sink'
,
'skateboard'
,
'skis'
,
'snowboard'
,
'spoon'
,
'sports ball'
,
'stop sign'
,
'suitcase'
,
'surfboard'
,
'teddy bear'
,
'tennis racket'
,
'tie'
,
'toaster'
,
'toilet'
,
'toothbrush'
,
'traffic light'
,
'train'
,
'truck'
,
'tv'
,
'umbrella'
,
'vase'
,
'wine glass'
,
'zebra'
]
SEEDS
=
list
(
range
(
10
))
SHOTS
=
[
10
,
30
]
FILE_SUFFIXES
=
collections
.
defaultdict
(
list
)
for
_seed
,
_shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
for
_category
in
CATEGORIES
:
FILE_SUFFIXES
[(
_seed
,
_shots
)].
append
(
'{}full_box_{}shot_{}_trainval.json'
.
format
(
# http://dl.yf.io/fs-det/datasets/cocosplit/ is organized like so:
#
# datasplit/
# trainvalno5k.json
# 5k.json
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
# seed{1-9}/
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
#
# This means that the JSON files for seed0 are located in the root
# directory rather than in a `seed?/` subdirectory, hence the
# conditional expression below.
''
if
_seed
==
0
else
'seed{}/'
.
format
(
_seed
),
_shots
,
_category
))
def
main
(
unused_argv
):
workdir
=
FLAGS
.
workdir
for
seed
,
shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
# Retrieve all examples for a given seed and shots setting.
file_paths
=
[
os
.
path
.
join
(
workdir
,
suffix
)
for
suffix
in
FILE_SUFFIXES
[(
seed
,
shots
)]]
json_dicts
=
[]
for
file_path
in
file_paths
:
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'r'
)
as
f
:
json_dicts
.
append
(
json
.
load
(
f
))
# Make sure that all JSON files for a given seed and shots setting have the
# same metadata. We count on this to fuse them later on.
metadata_dicts
=
[{
'info'
:
d
[
'info'
],
'licenses'
:
d
[
'licenses'
],
'categories'
:
d
[
'categories'
]}
for
d
in
json_dicts
]
if
not
all
(
d
==
metadata_dicts
[
0
]
for
d
in
metadata_dicts
[
1
:]):
raise
RuntimeError
(
'JSON files for {} shots (seed {}) '
.
format
(
shots
,
seed
)
+
'have different info, licences, or categories fields'
)
# Retrieve images across all JSON files.
images
=
sum
((
d
[
'images'
]
for
d
in
json_dicts
),
[])
# Remove duplicate image entries.
images
=
list
({
image
[
'id'
]:
image
for
image
in
images
}.
values
())
output_dict
=
{
'info'
:
json_dicts
[
0
][
'info'
],
'licenses'
:
json_dicts
[
0
][
'licenses'
],
'categories'
:
json_dicts
[
0
][
'categories'
],
'images'
:
images
,
'annotations'
:
sum
((
d
[
'annotations'
]
for
d
in
json_dicts
),
[])
}
output_path
=
os
.
path
.
join
(
workdir
,
'{}shot_seed{}.json'
.
format
(
shots
,
seed
))
with
tf
.
io
.
gfile
.
GFile
(
output_path
,
'w'
)
as
f
:
json
.
dump
(
output_dict
,
f
)
logger
.
info
(
'Processed %d shots (seed %d) and saved to %s'
,
shots
,
seed
,
output_path
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'workdir'
)
app
.
run
(
main
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment