Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
09d9656f
Unverified
Commit
09d9656f
authored
Jan 13, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Jan 13, 2022
Browse files
Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling
parents
ac671306
49a5706c
Changes
427
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
596 additions
and
312 deletions
+596
-312
official/vision/beta/configs/semantic_segmentation.py
official/vision/beta/configs/semantic_segmentation.py
+11
-0
official/vision/beta/data/process_coco_panoptic.sh
official/vision/beta/data/process_coco_panoptic.sh
+40
-0
official/vision/beta/dataloaders/parser.py
official/vision/beta/dataloaders/parser.py
+1
-1
official/vision/beta/dataloaders/retinanet_input.py
official/vision/beta/dataloaders/retinanet_input.py
+24
-2
official/vision/beta/dataloaders/tf_example_decoder.py
official/vision/beta/dataloaders/tf_example_decoder.py
+2
-1
official/vision/beta/dataloaders/tf_example_decoder_test.py
official/vision/beta/dataloaders/tf_example_decoder_test.py
+65
-152
official/vision/beta/dataloaders/tf_example_label_map_decoder_test.py
...ion/beta/dataloaders/tf_example_label_map_decoder_test.py
+38
-118
official/vision/beta/dataloaders/tfds_factory_test.py
official/vision/beta/dataloaders/tfds_factory_test.py
+36
-0
official/vision/beta/dataloaders/tfexample_utils.py
official/vision/beta/dataloaders/tfexample_utils.py
+110
-7
official/vision/beta/dataloaders/utils.py
official/vision/beta/dataloaders/utils.py
+1
-1
official/vision/beta/dataloaders/video_input.py
official/vision/beta/dataloaders/video_input.py
+1
-1
official/vision/beta/evaluation/coco_utils.py
official/vision/beta/evaluation/coco_utils.py
+25
-10
official/vision/beta/evaluation/coco_utils_test.py
official/vision/beta/evaluation/coco_utils_test.py
+49
-0
official/vision/beta/evaluation/iou_test.py
official/vision/beta/evaluation/iou_test.py
+16
-0
official/vision/beta/evaluation/panoptic_quality_evaluator_test.py
...vision/beta/evaluation/panoptic_quality_evaluator_test.py
+12
-6
official/vision/beta/evaluation/segmentation_metrics.py
official/vision/beta/evaluation/segmentation_metrics.py
+8
-13
official/vision/beta/evaluation/segmentation_metrics_test.py
official/vision/beta/evaluation/segmentation_metrics_test.py
+77
-0
official/vision/beta/losses/segmentation_losses.py
official/vision/beta/losses/segmentation_losses.py
+45
-0
official/vision/beta/modeling/backbones/__init__.py
official/vision/beta/modeling/backbones/__init__.py
+1
-0
official/vision/beta/modeling/backbones/factory_test.py
official/vision/beta/modeling/backbones/factory_test.py
+34
-0
No files found.
official/vision/beta/configs/semantic_segmentation.py
View file @
09d9656f
...
...
@@ -76,6 +76,16 @@ class SegmentationHead(hyperparams.Config):
decoder_max_level
:
Optional
[
Union
[
int
,
str
]]
=
None
@
dataclasses
.
dataclass
class
MaskScoringHead
(
hyperparams
.
Config
):
"""Mask Scoring head config."""
num_convs
:
int
=
4
num_filters
:
int
=
128
fc_input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
num_fcs
:
int
=
2
fc_dims
:
int
=
1024
@
dataclasses
.
dataclass
class
SemanticSegmentationModel
(
hyperparams
.
Config
):
"""Semantic segmentation model config."""
...
...
@@ -87,6 +97,7 @@ class SemanticSegmentationModel(hyperparams.Config):
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'identity'
)
mask_scoring_head
:
Optional
[
MaskScoringHead
]
=
None
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
()
...
...
official/vision/beta/data/process_coco_panoptic.sh
0 → 100644
View file @
09d9656f
#!/bin/bash
sudo
apt update
sudo
apt
install
unzip aria2
-y
DATA_DIR
=
$1
aria2c
-j
8
-Z
\
http://images.cocodataset.org/annotations/annotations_trainval2017.zip
\
http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip
\
http://images.cocodataset.org/zips/train2017.zip
\
http://images.cocodataset.org/zips/val2017.zip
\
--dir
=
$DATA_DIR
;
unzip
$DATA_DIR
/
"*"
.zip
-d
$DATA_DIR
;
mkdir
$DATA_DIR
/zips
&&
mv
$DATA_DIR
/
*
.zip
$DATA_DIR
/zips
;
unzip
$DATA_DIR
/annotations/panoptic_train2017.zip
-d
$DATA_DIR
unzip
$DATA_DIR
/annotations/panoptic_val2017.zip
-d
$DATA_DIR
python3 official/vision/beta/data/create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
"
$DATA_DIR
/val2017"
\
--object_annotations_file
=
"
$DATA_DIR
/annotations/instances_val2017.json"
\
--output_file_prefix
=
"
$DATA_DIR
/tfrecords/val"
\
--panoptic_annotations_file
=
"
$DATA_DIR
/annotations/panoptic_val2017.json"
\
--panoptic_masks_dir
=
"
$DATA_DIR
/panoptic_val2017"
\
--num_shards
=
8
\
--include_masks
\
--include_panoptic_masks
python3 official/vision/beta/data/create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
"
$DATA_DIR
/train2017"
\
--object_annotations_file
=
"
$DATA_DIR
/annotations/instances_train2017.json"
\
--output_file_prefix
=
"
$DATA_DIR
/tfrecords/train"
\
--panoptic_annotations_file
=
"
$DATA_DIR
/annotations/panoptic_train2017.json"
\
--panoptic_masks_dir
=
"
$DATA_DIR
/panoptic_train2017"
\
--num_shards
=
32
\
--include_masks
\
--include_panoptic_masks
official/vision/beta/dataloaders/parser.py
View file @
09d9656f
...
...
@@ -55,7 +55,7 @@ class Parser(object):
is_training: a `bool` to indicate whether it is in training mode.
Returns:
parse: a `callable` that takes the serialized examle and generate the
parse: a `callable` that takes the serialized exam
p
le and generate the
images, labels tuple where labels is a dict of Tensors that contains
labels.
"""
...
...
official/vision/beta/dataloaders/retinanet_input.py
View file @
09d9656f
...
...
@@ -19,11 +19,13 @@ into (image, labels) tuple for RetinaNet.
"""
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
parser
from
official.vision.beta.dataloaders
import
utils
from
official.vision.beta.ops
import
anchor
from
official.vision.beta.ops
import
augment
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
preprocess_ops
...
...
@@ -40,6 +42,7 @@ class Parser(parser.Parser):
anchor_size
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
,
aug_type
=
None
,
aug_rand_hflip
=
False
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
...
...
@@ -71,6 +74,8 @@ class Parser(parser.Parser):
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
aug_type: An optional Augmentation object to choose from AutoAugment and
RandAugment. The latter is not supported, and will raise ValueError.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
...
...
@@ -108,7 +113,20 @@ class Parser(parser.Parser):
self
.
_aug_scale_min
=
aug_scale_min
self
.
_aug_scale_max
=
aug_scale_max
# Data Augmentation with AutoAugment.
# Data augmentation with AutoAugment or RandAugment.
self
.
_augmenter
=
None
if
aug_type
is
not
None
:
if
aug_type
.
type
==
'autoaug'
:
logging
.
info
(
'Using AutoAugment.'
)
self
.
_augmenter
=
augment
.
AutoAugment
(
augmentation_name
=
aug_type
.
autoaug
.
augmentation_name
,
cutout_const
=
aug_type
.
autoaug
.
cutout_const
,
translate_const
=
aug_type
.
autoaug
.
translate_const
)
else
:
# TODO(b/205346436) Support RandAugment.
raise
ValueError
(
f
'Augmentation policy
{
aug_type
.
type
}
not supported.'
)
# Deprecated. Data Augmentation with AutoAugment.
self
.
_use_autoaugment
=
use_autoaugment
self
.
_autoaugment_policy_name
=
autoaugment_policy_name
...
...
@@ -138,9 +156,13 @@ class Parser(parser.Parser):
for
k
,
v
in
attributes
.
items
():
attributes
[
k
]
=
tf
.
gather
(
v
,
indices
)
# Gets original image
and its size
.
# Gets original image.
image
=
data
[
'image'
]
# Apply autoaug or randaug.
if
self
.
_augmenter
is
not
None
:
image
,
boxes
=
self
.
_augmenter
.
distort_with_boxes
(
image
,
boxes
)
image_shape
=
tf
.
shape
(
input
=
image
)[
0
:
2
]
# Normalizes image with mean and std pixel values.
...
...
official/vision/beta/dataloaders/tf_example_decoder.py
View file @
09d9656f
...
...
@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder
def
_generate_source_id
(
image_bytes
):
# Hashing using 22 bits since float32 has only 23 mantissa bits.
return
tf
.
strings
.
as_string
(
tf
.
strings
.
to_hash_bucket_fast
(
image_bytes
,
2
**
63
-
1
))
tf
.
strings
.
to_hash_bucket_fast
(
image_bytes
,
2
**
22
-
1
))
class
TfExampleDecoder
(
decoder
.
Decoder
):
...
...
official/vision/beta/dataloaders/tf_example_decoder_test.py
View file @
09d9656f
...
...
@@ -14,24 +14,13 @@
"""Tests for tf_example_decoder.py."""
import
io
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
tf_example_decoder
DUMP_SOURCE_ID
=
b
'123'
def
_encode_image
(
image_array
,
fmt
):
image
=
Image
.
fromarray
(
image_array
)
with
io
.
BytesIO
()
as
output
:
image
.
save
(
output
,
format
=
fmt
)
return
output
.
getvalue
()
from
official.vision.beta.dataloaders
import
tfexample_utils
class
TfExampleDecoderTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
...
...
@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
decoder
=
tf_example_decoder
.
TfExampleDecoder
(
include_mask
=
True
,
regenerate_source_id
=
regenerate_source_id
)
image
=
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
image_height
,
image_width
,
3
)
*
255
),
fmt
=
'JPEG'
)
if
num_instances
==
0
:
xmins
=
[]
xmaxs
=
[]
ymins
=
[]
ymaxs
=
[]
labels
=
[]
areas
=
[]
is_crowds
=
[]
masks
=
[]
else
:
xmins
=
list
(
np
.
random
.
rand
(
num_instances
))
xmaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
ymins
=
list
(
np
.
random
.
rand
(
num_instances
))
ymaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
labels
=
list
(
np
.
random
.
randint
(
100
,
size
=
num_instances
))
areas
=
[(
xmax
-
xmin
)
*
(
ymax
-
ymin
)
*
image_height
*
image_width
for
xmin
,
xmax
,
ymin
,
ymax
in
zip
(
xmins
,
xmaxs
,
ymins
,
ymaxs
)]
is_crowds
=
[
0
]
*
num_instances
masks
=
[]
for
_
in
range
(
num_instances
):
mask
=
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
image_height
,
image_width
)
*
255
),
fmt
=
'PNG'
)
masks
.
append
(
mask
)
serialized_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
labels
))),
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
is_crowds
))),
'image/object/area'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
areas
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
})).
SerializeToString
()
serialized_example
=
tfexample_utils
.
create_detection_test_example
(
image_height
=
image_height
,
image_width
=
image_width
,
image_channel
=
3
,
num_instances
=
num_instances
).
SerializeToString
()
decoded_tensors
=
decoder
.
decode
(
tf
.
convert_to_tensor
(
value
=
serialized_example
))
...
...
@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
(
image_height
,
image_width
,
3
),
results
[
'image'
].
shape
)
if
not
regenerate_source_id
:
self
.
assertEqual
(
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
tfexample_utils
.
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
image_height
,
results
[
'height'
])
self
.
assertEqual
(
image_width
,
results
[
'width'
])
self
.
assertAllEqual
(
...
...
@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]]
image
=
_
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image
=
tfexample_utils
.
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image_height
=
4
image_width
=
4
num_instances
=
2
...
...
@@ -172,45 +99,37 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
]]]
masks
=
[
_encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)]
masks
=
[
tfexample_utils
.
encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)
]
serialized_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
tfexample_utils
.
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
labels
))),
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
is_crowds
))),
'image/object/area'
:
(
tf
.
train
.
Feature
(
'image/object/area'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
areas
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
})).
SerializeToString
()
decoded_tensors
=
decoder
.
decode
(
...
...
@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
(
image_height
,
image_width
,
3
),
results
[
'image'
].
shape
)
self
.
assertAllEqual
(
image_content
,
results
[
'image'
])
self
.
assertEqual
(
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
tfexample_utils
.
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
image_height
,
results
[
'height'
])
self
.
assertEqual
(
image_width
,
results
[
'width'
])
self
.
assertAllEqual
(
...
...
@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]]
image
=
_
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image
=
tfexample_utils
.
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image_height
=
4
image_width
=
4
num_instances
=
2
...
...
@@ -276,39 +195,33 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
]]]
masks
=
[
_encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)]
masks
=
[
tfexample_utils
.
encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)
]
serialized_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
tfexample_utils
.
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
labels
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
})).
SerializeToString
()
decoded_tensors
=
decoder
.
decode
(
...
...
@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
(
image_height
,
image_width
,
3
),
results
[
'image'
].
shape
)
self
.
assertAllEqual
(
image_content
,
results
[
'image'
])
self
.
assertEqual
(
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
tfexample_utils
.
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
image_height
,
results
[
'height'
])
self
.
assertEqual
(
image_width
,
results
[
'width'
])
self
.
assertAllEqual
(
...
...
official/vision/beta/dataloaders/tf_example_label_map_decoder_test.py
View file @
09d9656f
...
...
@@ -14,28 +14,19 @@
"""Tests for tf_example_label_map_decoder.py."""
import
io
import
os
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
tf_example_label_map_decoder
from
official.vision.beta.dataloaders
import
tfexample_utils
DUMP_SOURCE_ID
=
b
'123'
LABEL_MAP_CSV_CONTENT
=
'0,class_0
\n
1,class_1
\n
2,class_2'
def
_encode_image
(
image_array
,
fmt
):
image
=
Image
.
fromarray
(
image_array
)
with
io
.
BytesIO
()
as
output
:
image
.
save
(
output
,
format
=
fmt
)
return
output
.
getvalue
()
class
TfExampleDecoderLabelMapTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
...
...
@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
decoder
=
tf_example_label_map_decoder
.
TfExampleDecoderLabelMap
(
label_map_path
,
include_mask
=
True
)
image
=
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
image_height
,
image_width
,
3
)
*
255
),
fmt
=
'JPEG'
)
if
num_instances
==
0
:
xmins
=
[]
xmaxs
=
[]
ymins
=
[]
ymaxs
=
[]
labels
=
[]
areas
=
[]
is_crowds
=
[]
masks
=
[]
else
:
xmins
=
list
(
np
.
random
.
rand
(
num_instances
))
xmaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
ymins
=
list
(
np
.
random
.
rand
(
num_instances
))
ymaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
labels
=
list
(
np
.
random
.
randint
(
100
,
size
=
num_instances
))
areas
=
[(
xmax
-
xmin
)
*
(
ymax
-
ymin
)
*
image_height
*
image_width
for
xmin
,
xmax
,
ymin
,
ymax
in
zip
(
xmins
,
xmaxs
,
ymins
,
ymaxs
)]
is_crowds
=
[
0
]
*
num_instances
masks
=
[]
labels
=
[
b
'class_1'
]
*
num_instances
for
_
in
range
(
num_instances
):
mask
=
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
image_height
,
image_width
)
*
255
),
fmt
=
'PNG'
)
masks
.
append
(
mask
)
serialized_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/text'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
labels
))),
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
is_crowds
))),
'image/object/area'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
areas
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
})).
SerializeToString
()
serialized_example
=
tfexample_utils
.
create_detection_test_example
(
image_height
=
image_height
,
image_width
=
image_width
,
image_channel
=
3
,
num_instances
=
num_instances
).
SerializeToString
()
decoded_tensors
=
decoder
.
decode
(
tf
.
convert_to_tensor
(
value
=
serialized_example
))
...
...
@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
(
image_height
,
image_width
,
3
),
results
[
'image'
].
shape
)
self
.
assertEqual
(
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
tfexample_utils
.
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
image_height
,
results
[
'height'
])
self
.
assertEqual
(
image_width
,
results
[
'width'
])
self
.
assertAllEqual
(
...
...
@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
255
,
255
,
255
],
[
255
,
255
,
255
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]]
image
=
_
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image
=
tfexample_utils
.
encode_image
(
np
.
uint8
(
image_content
),
fmt
=
'PNG'
)
image_height
=
4
image_width
=
4
num_instances
=
2
...
...
@@ -183,45 +111,37 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
],
[
0
,
255
,
255
,
255
]]]
masks
=
[
_encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)]
masks
=
[
tfexample_utils
.
encode_image
(
np
.
uint8
(
m
),
fmt
=
'PNG'
)
for
m
in
list
(
mask_content
)
]
serialized_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
tfexample_utils
.
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/text'
:
(
tf
.
train
.
Feature
(
'image/object/class/text'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
labels
))),
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
is_crowds
))),
'image/object/area'
:
(
tf
.
train
.
Feature
(
'image/object/area'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
areas
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
})).
SerializeToString
()
decoded_tensors
=
decoder
.
decode
(
...
...
@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
(
image_height
,
image_width
,
3
),
results
[
'image'
].
shape
)
self
.
assertAllEqual
(
image_content
,
results
[
'image'
])
self
.
assertEqual
(
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
tfexample_utils
.
DUMP_SOURCE_ID
,
results
[
'source_id'
])
self
.
assertEqual
(
image_height
,
results
[
'height'
])
self
.
assertEqual
(
image_width
,
results
[
'width'
])
self
.
assertAllEqual
(
...
...
official/vision/beta/dataloaders/tfds_factory_test.py
View file @
09d9656f
...
...
@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory
class
TFDSFactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
_create_test_example
(
self
):
serialized_example
=
{
'image'
:
tf
.
ones
(
shape
=
(
100
,
100
,
3
),
dtype
=
tf
.
uint8
),
'label'
:
1
,
'image/id'
:
0
,
'objects'
:
{
'label'
:
1
,
'is_crowd'
:
0
,
'area'
:
0.5
,
'bbox'
:
[
0.1
,
0.2
,
0.3
,
0.4
]
},
'segmentation_label'
:
tf
.
ones
((
100
,
100
,
1
),
dtype
=
tf
.
uint8
),
'image_left'
:
tf
.
ones
(
shape
=
(
100
,
100
,
3
),
dtype
=
tf
.
uint8
)
}
return
serialized_example
@
parameterized
.
parameters
(
(
'imagenet2012'
),
(
'cifar10'
),
...
...
@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def
test_classification_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_classification_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
decoded_tensor
=
decoder
.
decode
(
self
.
_create_test_example
())
self
.
assertLen
(
decoded_tensor
,
2
)
self
.
assertIn
(
'image/encoded'
,
decoded_tensor
)
self
.
assertIn
(
'image/class/label'
,
decoded_tensor
)
@
parameterized
.
parameters
(
(
'flowers'
),
...
...
@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def
test_detection_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_detection_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
decoded_tensor
=
decoder
.
decode
(
self
.
_create_test_example
())
self
.
assertLen
(
decoded_tensor
,
8
)
self
.
assertIn
(
'image'
,
decoded_tensor
)
self
.
assertIn
(
'source_id'
,
decoded_tensor
)
self
.
assertIn
(
'height'
,
decoded_tensor
)
self
.
assertIn
(
'width'
,
decoded_tensor
)
self
.
assertIn
(
'groundtruth_classes'
,
decoded_tensor
)
self
.
assertIn
(
'groundtruth_is_crowd'
,
decoded_tensor
)
self
.
assertIn
(
'groundtruth_area'
,
decoded_tensor
)
self
.
assertIn
(
'groundtruth_boxes'
,
decoded_tensor
)
@
parameterized
.
parameters
(
(
'pascal'
),
...
...
@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def
test_segmentation_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_segmentation_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
decoded_tensor
=
decoder
.
decode
(
self
.
_create_test_example
())
self
.
assertLen
(
decoded_tensor
,
4
)
self
.
assertIn
(
'image/encoded'
,
decoded_tensor
)
self
.
assertIn
(
'image/segmentation/class/encoded'
,
decoded_tensor
)
self
.
assertIn
(
'image/height'
,
decoded_tensor
)
self
.
assertIn
(
'image/width'
,
decoded_tensor
)
@
parameterized
.
parameters
(
(
'coco'
),
...
...
official/vision/beta/dataloaders/tfexample_utils.py
View file @
09d9656f
...
...
@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded'
CLASSIFICATION_LABEL_KEY
=
'image/class/label'
LABEL_KEY
=
'clip/label/index'
AUDIO_KEY
=
'features/audio'
DUMP_SOURCE_ID
=
b
'123'
def
make_image_bytes
(
shape
:
Sequence
[
int
]):
"""Generates image and return bytes in JPEG format."""
def
encode_image
(
image_array
:
np
.
array
,
fmt
:
str
)
->
bytes
:
image
=
Image
.
fromarray
(
image_array
)
with
io
.
BytesIO
()
as
output
:
image
.
save
(
output
,
format
=
fmt
)
return
output
.
getvalue
()
def
make_image_bytes
(
shape
:
Sequence
[
int
],
fmt
:
str
=
'JPEG'
)
->
bytes
:
"""Generates image and return bytes in specified format."""
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
shape
,
dtype
=
np
.
uint8
)
random_image
=
Image
.
fromarray
(
random_image
)
with
io
.
BytesIO
()
as
buffer
:
random_image
.
save
(
buffer
,
format
=
'JPEG'
)
raw_image_bytes
=
buffer
.
getvalue
()
return
raw_image_bytes
return
encode_image
(
random_image
,
fmt
=
fmt
)
def
put_int64_to_context
(
seq_example
:
tf
.
train
.
SequenceExample
,
...
...
@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int,
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
labels
.
tobytes
()])))
}
return
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature
))
def
create_detection_test_example
(
image_height
:
int
,
image_width
:
int
,
image_channel
:
int
,
num_instances
:
int
)
->
tf
.
train
.
Example
:
"""Creates and returns a test example containing box and mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
num_instances: The number of object instances per image.
Returns:
A tf.train.Example for testing.
"""
image
=
make_image_bytes
([
image_height
,
image_width
,
image_channel
])
if
num_instances
==
0
:
xmins
=
[]
xmaxs
=
[]
ymins
=
[]
ymaxs
=
[]
labels
=
[]
areas
=
[]
is_crowds
=
[]
masks
=
[]
labels_text
=
[]
else
:
xmins
=
list
(
np
.
random
.
rand
(
num_instances
))
xmaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
ymins
=
list
(
np
.
random
.
rand
(
num_instances
))
ymaxs
=
list
(
np
.
random
.
rand
(
num_instances
))
labels_text
=
[
b
'class_1'
]
*
num_instances
labels
=
list
(
np
.
random
.
randint
(
100
,
size
=
num_instances
))
areas
=
[(
xmax
-
xmin
)
*
(
ymax
-
ymin
)
*
image_height
*
image_width
for
xmin
,
xmax
,
ymin
,
ymax
in
zip
(
xmins
,
xmaxs
,
ymins
,
ymaxs
)]
is_crowds
=
[
0
]
*
num_instances
masks
=
[]
for
_
in
range
(
num_instances
):
mask
=
make_image_bytes
([
image_height
,
image_width
],
fmt
=
'PNG'
)
masks
.
append
(
mask
)
return
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/source_id'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
DUMP_SOURCE_ID
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
]))),
'image/object/bbox/xmin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmins
))),
'image/object/bbox/xmax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmaxs
))),
'image/object/bbox/ymin'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymins
))),
'image/object/bbox/ymax'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymaxs
))),
'image/object/class/label'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
labels
))),
'image/object/class/text'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
labels_text
))),
'image/object/is_crowd'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
is_crowds
))),
'image/object/area'
:
(
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
areas
))),
'image/object/mask'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
masks
))),
}))
def
create_segmentation_test_example
(
image_height
:
int
,
image_width
:
int
,
image_channel
:
int
)
->
tf
.
train
.
Example
:
"""Creates and returns a test example containing mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
Returns:
A tf.train.Example for testing.
"""
image
=
make_image_bytes
([
image_height
,
image_width
,
image_channel
])
mask
=
make_image_bytes
([
image_height
,
image_width
],
fmt
=
'PNG'
)
return
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
image
]))),
'image/segmentation/class/encoded'
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
mask
]))),
'image/height'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_height
]))),
'image/width'
:
(
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
image_width
])))
}))
official/vision/beta/dataloaders/utils.py
View file @
09d9656f
...
...
@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor:
A formatted source ID.
"""
if
source_id
.
dtype
==
tf
.
string
:
source_id
=
tf
.
cast
(
tf
.
strings
.
to_number
(
source_id
)
,
tf
.
int64
)
source_id
=
tf
.
strings
.
to_number
(
source_id
,
tf
.
int64
)
with
tf
.
control_dependencies
([
source_id
]):
source_id
=
tf
.
cond
(
pred
=
tf
.
equal
(
tf
.
size
(
input
=
source_id
),
0
),
...
...
official/vision/beta/dataloaders/video_input.py
View file @
09d9656f
...
...
@@ -361,7 +361,7 @@ class Parser(parser.Parser):
audio
=
decoded_tensors
[
self
.
_audio_feature
]
audio
=
tf
.
cast
(
audio
,
dtype
=
self
.
_dtype
)
audio
=
preprocess_ops_3d
.
sample_sequence
(
audio
,
20
,
random
=
False
,
stride
=
1
)
audio
,
self
.
_audio_shape
[
0
]
,
random
=
False
,
stride
=
1
)
audio
=
tf
.
ensure_shape
(
audio
,
self
.
_audio_shape
)
features
[
'audio'
]
=
audio
...
...
official/vision/beta/evaluation/coco_utils.py
View file @
09d9656f
...
...
@@ -212,6 +212,8 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
gt_annotations
=
[]
num_batches
=
len
(
groundtruths
[
'source_id'
])
for
i
in
range
(
num_batches
):
logging
.
info
(
'convert_groundtruths_to_coco_dataset: Processing annotation %d'
,
i
)
max_num_instances
=
groundtruths
[
'classes'
][
i
].
shape
[
1
]
batch_size
=
groundtruths
[
'source_id'
][
i
].
shape
[
0
]
for
j
in
range
(
batch_size
):
...
...
@@ -259,6 +261,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
np_mask
[
np_mask
>
0
]
=
255
encoded_mask
=
mask_api
.
encode
(
np
.
asfortranarray
(
np_mask
))
ann
[
'segmentation'
]
=
encoded_mask
# Ensure the content of `counts` is JSON serializable string.
if
'counts'
in
ann
[
'segmentation'
]:
ann
[
'segmentation'
][
'counts'
]
=
six
.
ensure_str
(
ann
[
'segmentation'
][
'counts'
])
if
'areas'
not
in
groundtruths
:
ann
[
'area'
]
=
mask_api
.
area
(
encoded_mask
)
gt_annotations
.
append
(
ann
)
...
...
@@ -283,11 +289,13 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
class
COCOGroundtruthGenerator
:
"""Generates the groundtruth annotations from a single example."""
def
__init__
(
self
,
file_pattern
,
file_type
,
num_examples
,
include_mask
):
def
__init__
(
self
,
file_pattern
,
file_type
,
num_examples
,
include_mask
,
regenerate_source_id
=
False
):
self
.
_file_pattern
=
file_pattern
self
.
_num_examples
=
num_examples
self
.
_include_mask
=
include_mask
self
.
_dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
file_type
)
self
.
_regenerate_source_id
=
regenerate_source_id
def
_parse_single_example
(
self
,
example
):
"""Parses a single serialized tf.Example proto.
...
...
@@ -312,16 +320,21 @@ class COCOGroundtruthGenerator:
mask of each instance.
"""
decoder
=
tf_example_decoder
.
TfExampleDecoder
(
include_mask
=
self
.
_include_mask
)
include_mask
=
self
.
_include_mask
,
regenerate_source_id
=
self
.
_regenerate_source_id
)
decoded_tensors
=
decoder
.
decode
(
example
)
image
=
decoded_tensors
[
'image'
]
image_size
=
tf
.
shape
(
image
)[
0
:
2
]
boxes
=
box_ops
.
denormalize_boxes
(
decoded_tensors
[
'groundtruth_boxes'
],
image_size
)
source_id
=
decoded_tensors
[
'source_id'
]
if
source_id
.
dtype
is
tf
.
string
:
source_id
=
tf
.
strings
.
to_number
(
source_id
,
out_type
=
tf
.
int64
)
groundtruths
=
{
'source_id'
:
tf
.
strings
.
to_number
(
decoded_tensors
[
'source_id'
],
out_type
=
tf
.
int64
),
'source_id'
:
source_id
,
'height'
:
decoded_tensors
[
'height'
],
'width'
:
decoded_tensors
[
'width'
],
'num_detections'
:
tf
.
shape
(
decoded_tensors
[
'groundtruth_classes'
])[
0
],
...
...
@@ -341,9 +354,10 @@ class COCOGroundtruthGenerator:
dataset
=
tf
.
data
.
Dataset
.
list_files
(
self
.
_file_pattern
,
shuffle
=
False
)
dataset
=
dataset
.
interleave
(
map_func
=
lambda
filename
:
self
.
_dataset_fn
(
filename
).
prefetch
(
1
),
cycle_length
=
12
,
cycle_length
=
None
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
take
(
self
.
_num_examples
)
dataset
=
dataset
.
map
(
self
.
_parse_single_example
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
1
,
drop_remainder
=
False
)
...
...
@@ -351,18 +365,18 @@ class COCOGroundtruthGenerator:
return
dataset
def
__call__
(
self
):
for
groundtruth_result
in
self
.
_build_pipeline
():
yield
groundtruth_result
return
self
.
_build_pipeline
()
def
scan_and_generator_annotation_file
(
file_pattern
:
str
,
file_type
:
str
,
num_samples
:
int
,
include_mask
:
bool
,
annotation_file
:
str
):
annotation_file
:
str
,
regenerate_source_id
:
bool
=
False
):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator
=
COCOGroundtruthGenerator
(
file_pattern
,
file_type
,
num_samples
,
include_mask
)
file_pattern
,
file_type
,
num_samples
,
include_mask
,
regenerate_source_id
)
generate_annotation_file
(
groundtruth_generator
,
annotation_file
)
...
...
@@ -371,7 +385,8 @@ def generate_annotation_file(groundtruth_generator,
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths
=
{}
logging
.
info
(
'Loading groundtruth annotations from dataset to memory...'
)
for
groundtruth
in
groundtruth_generator
():
for
i
,
groundtruth
in
enumerate
(
groundtruth_generator
()):
logging
.
info
(
'generate_annotation_file: Processing annotation %d'
,
i
)
for
k
,
v
in
six
.
iteritems
(
groundtruth
):
if
k
not
in
groundtruths
:
groundtruths
[
k
]
=
[
v
]
...
...
official/vision/beta/evaluation/coco_utils_test.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for coco_utils."""
import
os
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
tfexample_utils
from
official.vision.beta.evaluation
import
coco_utils
class
CocoUtilsTest
(
tf
.
test
.
TestCase
):
def
test_scan_and_generator_annotation_file
(
self
):
num_samples
=
10
example
=
tfexample_utils
.
create_detection_test_example
(
image_height
=
512
,
image_width
=
512
,
image_channel
=
3
,
num_instances
=
10
)
tf_examples
=
[
example
]
*
num_samples
data_file
=
os
.
path
.
join
(
self
.
create_tempdir
(),
'test.tfrecord'
)
tfexample_utils
.
dump_to_tfrecord
(
record_file
=
data_file
,
tf_examples
=
tf_examples
)
annotation_file
=
os
.
path
.
join
(
self
.
create_tempdir
(),
'annotation.json'
)
coco_utils
.
scan_and_generator_annotation_file
(
file_pattern
=
data_file
,
file_type
=
'tfrecord'
,
num_samples
=
num_samples
,
include_mask
=
True
,
annotation_file
=
annotation_file
)
self
.
assertTrue
(
tf
.
io
.
gfile
.
exists
(
annotation_file
),
msg
=
'Annotation file {annotation_file} does not exists.'
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/evaluation/iou_test.py
View file @
09d9656f
...
...
@@ -95,5 +95,21 @@ class MeanIoUTest(tf.test.TestCase):
expected_result
=
[
0
,
1
/
(
1
+
1
-
1
)]
self
.
assertAllClose
(
expected_result
,
result
,
atol
=
1e-3
)
def
test_update_state_annd_result
(
self
):
y_pred
=
[
0
,
1
,
0
,
1
]
y_true
=
[
0
,
0
,
1
,
1
]
m_obj
=
iou
.
PerClassIoU
(
num_classes
=
2
)
m_obj
.
update_state
(
y_true
,
y_pred
)
result
=
m_obj
.
result
()
# cm = [[1, 1],
# [1, 1]]
# sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result
=
[
1
/
(
2
+
2
-
1
),
1
/
(
2
+
2
-
1
)]
self
.
assertAllClose
(
expected_result
,
result
,
atol
=
1e-3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/evaluation/panoptic_quality_evaluator_test.py
View file @
09d9656f
...
...
@@ -45,19 +45,25 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
dtype
=
np
.
uint16
)
groundtruths
=
{
'category_mask'
:
tf
.
convert_to_tensor
(
category_mask
),
'instance_mask'
:
tf
.
convert_to_tensor
(
groundtruth_instance_mask
)
'category_mask'
:
tf
.
convert_to_tensor
([
category_mask
]),
'instance_mask'
:
tf
.
convert_to_tensor
([
groundtruth_instance_mask
]),
'image_info'
:
tf
.
convert_to_tensor
([[[
6
,
6
],
[
6
,
6
],
[
1.0
,
1.0
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
}
predictions
=
{
'category_mask'
:
tf
.
convert_to_tensor
(
category_mask
),
'instance_mask'
:
tf
.
convert_to_tensor
(
good_det_instance_mask
)
'category_mask'
:
tf
.
convert_to_tensor
(
[
category_mask
]
),
'instance_mask'
:
tf
.
convert_to_tensor
(
[
good_det_instance_mask
]
)
}
pq_evaluator
=
panoptic_quality_evaluator
.
PanopticQualityEvaluator
(
num_categories
=
1
,
ignored_label
=
2
,
max_instances_per_category
=
16
,
offset
=
16
)
offset
=
16
,
rescale_predictions
=
True
)
for
_
in
range
(
2
):
pq_evaluator
.
update_state
(
groundtruths
,
predictions
)
...
...
@@ -70,7 +76,7 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
[
1
,
1
,
1
,
1
,
1
,
1
],
],
dtype
=
np
.
uint16
)
predictions
[
'instance_mask'
]
=
tf
.
convert_to_tensor
(
bad_det_instance_mask
)
predictions
[
'instance_mask'
]
=
tf
.
convert_to_tensor
(
[
bad_det_instance_mask
]
)
for
_
in
range
(
2
):
pq_evaluator
.
update_state
(
groundtruths
,
predictions
)
...
...
official/vision/beta/evaluation/segmentation_metrics.py
View file @
09d9656f
...
...
@@ -41,8 +41,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
dtype: data type of the metric result.
"""
self
.
_rescale_predictions
=
rescale_predictions
super
(
MeanIoU
,
self
).
__init__
(
num_classes
=
num_classes
,
name
=
name
,
dtype
=
dtype
)
super
().
__init__
(
num_classes
=
num_classes
,
name
=
name
,
dtype
=
dtype
)
def
update_state
(
self
,
y_true
,
y_pred
):
"""Updates metric state.
...
...
@@ -120,8 +119,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
flatten_masks
=
tf
.
reshape
(
masks
,
shape
=
[
-
1
])
flatten_valid_masks
=
tf
.
reshape
(
valid_masks
,
shape
=
[
-
1
])
super
(
MeanIoU
,
self
).
update_state
(
flatten_masks
,
flatten_predictions
,
super
().
update_state
(
flatten_masks
,
flatten_predictions
,
tf
.
cast
(
flatten_valid_masks
,
tf
.
float32
))
...
...
@@ -148,8 +146,7 @@ class PerClassIoU(iou.PerClassIoU):
dtype: data type of the metric result.
"""
self
.
_rescale_predictions
=
rescale_predictions
super
(
PerClassIoU
,
self
).
__init__
(
num_classes
=
num_classes
,
name
=
name
,
dtype
=
dtype
)
super
().
__init__
(
num_classes
=
num_classes
,
name
=
name
,
dtype
=
dtype
)
def
update_state
(
self
,
y_true
,
y_pred
):
"""Updates metric state.
...
...
@@ -213,8 +210,7 @@ class PerClassIoU(iou.PerClassIoU):
flatten_predictions
=
tf
.
reshape
(
predicted_mask
,
shape
=
[
1
,
-
1
])
flatten_masks
=
tf
.
reshape
(
mask
,
shape
=
[
1
,
-
1
])
flatten_valid_masks
=
tf
.
reshape
(
valid_mask
,
shape
=
[
1
,
-
1
])
super
(
PerClassIoU
,
self
).
update_state
(
flatten_masks
,
flatten_predictions
,
super
().
update_state
(
flatten_masks
,
flatten_predictions
,
tf
.
cast
(
flatten_valid_masks
,
tf
.
float32
))
else
:
...
...
@@ -227,6 +223,5 @@ class PerClassIoU(iou.PerClassIoU):
flatten_masks
=
tf
.
reshape
(
masks
,
shape
=
[
-
1
])
flatten_valid_masks
=
tf
.
reshape
(
valid_masks
,
shape
=
[
-
1
])
super
(
PerClassIoU
,
self
).
update_state
(
flatten_masks
,
flatten_predictions
,
super
().
update_state
(
flatten_masks
,
flatten_predictions
,
tf
.
cast
(
flatten_valid_masks
,
tf
.
float32
))
official/vision/beta/evaluation/segmentation_metrics_test.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for segmentation_metrics."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.evaluation
import
segmentation_metrics
class
SegmentationMetricsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
def
_create_test_data
(
self
):
y_pred_cls0
=
np
.
expand_dims
(
np
.
array
([[
1
,
1
,
0
],
[
1
,
1
,
0
],
[
0
,
0
,
0
]],
dtype
=
np
.
uint16
),
axis
=
(
0
,
-
1
))
y_pred_cls1
=
np
.
expand_dims
(
np
.
array
([[
0
,
0
,
0
],
[
0
,
0
,
1
],
[
0
,
0
,
1
]],
dtype
=
np
.
uint16
),
axis
=
(
0
,
-
1
))
y_pred
=
np
.
concatenate
((
y_pred_cls0
,
y_pred_cls1
),
axis
=-
1
)
y_true
=
{
'masks'
:
np
.
expand_dims
(
np
.
array
([[
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
,
1
,
1
],
[
0
,
0
,
0
,
1
,
1
,
1
],
[
0
,
0
,
0
,
1
,
1
,
1
]],
dtype
=
np
.
uint16
),
axis
=
(
0
,
-
1
)),
'valid_masks'
:
np
.
ones
([
1
,
6
,
6
,
1
],
dtype
=
np
.
uint16
),
'image_info'
:
np
.
array
([[[
6
,
6
],
[
3
,
3
],
[
0.5
,
0.5
],
[
0
,
0
]]],
dtype
=
np
.
float32
)
}
return
y_pred
,
y_true
@
parameterized
.
parameters
(
True
,
False
)
def
test_mean_iou_metric
(
self
,
rescale_predictions
):
tf
.
config
.
experimental_run_functions_eagerly
(
True
)
mean_iou_metric
=
segmentation_metrics
.
MeanIoU
(
num_classes
=
2
,
rescale_predictions
=
rescale_predictions
)
y_pred
,
y_true
=
self
.
_create_test_data
()
# Disable autograph for correct coverage statistics.
update_fn
=
tf
.
autograph
.
experimental
.
do_not_convert
(
mean_iou_metric
.
update_state
)
update_fn
(
y_true
=
y_true
,
y_pred
=
y_pred
)
miou
=
mean_iou_metric
.
result
()
self
.
assertAlmostEqual
(
miou
.
numpy
(),
0.762
,
places
=
3
)
@
parameterized
.
parameters
(
True
,
False
)
def
test_per_class_mean_iou_metric
(
self
,
rescale_predictions
):
per_class_iou_metric
=
segmentation_metrics
.
PerClassIoU
(
num_classes
=
2
,
rescale_predictions
=
rescale_predictions
)
y_pred
,
y_true
=
self
.
_create_test_data
()
# Disable autograph for correct coverage statistics.
update_fn
=
tf
.
autograph
.
experimental
.
do_not_convert
(
per_class_iou_metric
.
update_state
)
update_fn
(
y_true
=
y_true
,
y_pred
=
y_pred
)
per_class_miou
=
per_class_iou_metric
.
result
()
self
.
assertAllClose
(
per_class_miou
.
numpy
(),
[
0.857
,
0.667
],
atol
=
1e-3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/losses/segmentation_losses.py
View file @
09d9656f
...
...
@@ -17,6 +17,8 @@
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
EPSILON
=
1e-5
...
...
@@ -87,3 +89,46 @@ class SegmentationLoss:
loss
=
tf
.
reduce_sum
(
top_k_losses
)
/
normalizer
return
loss
def
get_actual_mask_scores
(
logits
,
labels
,
ignore_label
):
"""Gets actual mask scores."""
_
,
height
,
width
,
num_classes
=
logits
.
get_shape
().
as_list
()
batch_size
=
tf
.
shape
(
logits
)[
0
]
logits
=
tf
.
stop_gradient
(
logits
)
labels
=
tf
.
image
.
resize
(
labels
,
(
height
,
width
),
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
predicted_labels
=
tf
.
argmax
(
logits
,
-
1
,
output_type
=
tf
.
int32
)
flat_predictions
=
tf
.
reshape
(
predicted_labels
,
[
batch_size
,
-
1
])
flat_labels
=
tf
.
cast
(
tf
.
reshape
(
labels
,
[
batch_size
,
-
1
]),
tf
.
int32
)
one_hot_predictions
=
tf
.
one_hot
(
flat_predictions
,
num_classes
,
on_value
=
True
,
off_value
=
False
)
one_hot_labels
=
tf
.
one_hot
(
flat_labels
,
num_classes
,
on_value
=
True
,
off_value
=
False
)
keep_mask
=
tf
.
not_equal
(
flat_labels
,
ignore_label
)
keep_mask
=
tf
.
expand_dims
(
keep_mask
,
2
)
overlap
=
tf
.
logical_and
(
one_hot_predictions
,
one_hot_labels
)
overlap
=
tf
.
logical_and
(
overlap
,
keep_mask
)
overlap
=
tf
.
reduce_sum
(
tf
.
cast
(
overlap
,
tf
.
float32
),
axis
=
1
)
union
=
tf
.
logical_or
(
one_hot_predictions
,
one_hot_labels
)
union
=
tf
.
logical_and
(
union
,
keep_mask
)
union
=
tf
.
reduce_sum
(
tf
.
cast
(
union
,
tf
.
float32
),
axis
=
1
)
actual_scores
=
tf
.
divide
(
overlap
,
tf
.
maximum
(
union
,
EPSILON
))
return
actual_scores
class
MaskScoringLoss
:
"""Mask Scoring loss."""
def
__init__
(
self
,
ignore_label
):
self
.
_ignore_label
=
ignore_label
self
.
_mse_loss
=
tf
.
keras
.
losses
.
MeanSquaredError
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
NONE
)
def
__call__
(
self
,
predicted_scores
,
logits
,
labels
):
actual_scores
=
get_actual_mask_scores
(
logits
,
labels
,
self
.
_ignore_label
)
loss
=
tf_utils
.
safe_mean
(
self
.
_mse_loss
(
actual_scores
,
predicted_scores
))
return
loss
official/vision/beta/modeling/backbones/__init__.py
View file @
09d9656f
...
...
@@ -16,6 +16,7 @@
"""Backbones package definition."""
from
official.vision.beta.modeling.backbones.efficientnet
import
EfficientNet
from
official.vision.beta.modeling.backbones.mobiledet
import
MobileDet
from
official.vision.beta.modeling.backbones.mobilenet
import
MobileNet
from
official.vision.beta.modeling.backbones.resnet
import
ResNet
from
official.vision.beta.modeling.backbones.resnet_3d
import
ResNet3D
...
...
official/vision/beta/modeling/backbones/factory_test.py
View file @
09d9656f
...
...
@@ -189,6 +189,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):
norm_momentum
=
0.99
,
norm_epsilon
=
1e-5
)
@
combinations
.
generate
(
combinations
.
combine
(
model_id
=
[
'MobileDetCPU'
,
'MobileDetDSP'
,
'MobileDetEdgeTPU'
,
'MobileDetGPU'
],
filter_size_scale
=
[
1.0
,
0.75
],
))
def
test_mobiledet_creation
(
self
,
model_id
,
filter_size_scale
):
"""Test creation of Mobiledet models."""
network
=
backbones
.
MobileDet
(
model_id
=
model_id
,
filter_size_scale
=
filter_size_scale
,
norm_momentum
=
0.99
,
norm_epsilon
=
1e-5
)
backbone_config
=
backbones_cfg
.
Backbone
(
type
=
'mobiledet'
,
mobiledet
=
backbones_cfg
.
MobileDet
(
model_id
=
model_id
,
filter_size_scale
=
filter_size_scale
))
norm_activation_config
=
common_cfg
.
NormActivation
(
norm_momentum
=
0.99
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)
factory_network
=
factory
.
build_backbone
(
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
backbone_config
=
backbone_config
,
norm_activation_config
=
norm_activation_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
…
13
14
15
16
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment