Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2412b118
"docs/vscode:/vscode.git/clone" did not exist on "c81dddb45c71e630b907f9d84686ecd73b4105c7"
Commit
2412b118
authored
Jul 02, 2022
by
Gunho Park
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
parents
f7783e7a
6dbdb08c
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
240 additions
and
65 deletions
+240
-65
official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab_test.py
...ojects/panoptic_maskrcnn/configs/panoptic_deeplab_test.py
+45
-0
official/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
...s/image_classification/imagenet_mobilenetv3large_tpu.yaml
+5
-5
official/vision/configs/experiments/image_classification/imagenet_mobilenetv3small_tpu.yaml
...s/image_classification/imagenet_mobilenetv3small_tpu.yaml
+4
-4
official/vision/configs/retinanet.py
official/vision/configs/retinanet.py
+1
-0
official/vision/dataloaders/classification_input.py
official/vision/dataloaders/classification_input.py
+5
-0
official/vision/modeling/factory.py
official/vision/modeling/factory.py
+1
-0
official/vision/modeling/heads/dense_prediction_heads.py
official/vision/modeling/heads/dense_prediction_heads.py
+9
-1
official/vision/modeling/heads/dense_prediction_heads_test.py
...cial/vision/modeling/heads/dense_prediction_heads_test.py
+8
-6
official/vision/modeling/maskrcnn_model.py
official/vision/modeling/maskrcnn_model.py
+1
-1
official/vision/ops/preprocess_ops.py
official/vision/ops/preprocess_ops.py
+47
-0
official/vision/ops/preprocess_ops_3d.py
official/vision/ops/preprocess_ops_3d.py
+95
-48
official/vision/ops/preprocess_ops_3d_test.py
official/vision/ops/preprocess_ops_3d_test.py
+10
-0
research/object_detection/meta_architectures/center_net_meta_arch.py
...ject_detection/meta_architectures/center_net_meta_arch.py
+9
-0
No files found.
official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab_test.py
0 → 100644
View file @
2412b118
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic deeplab config."""
# pylint: disable=unused-import
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
as
exp_cfg
class
PanopticMaskRCNNConfigTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
(
'panoptic_deeplab_resnet_coco'
,
'dilated_resnet'
),
(
'panoptic_deeplab_mobilenetv3_large_coco'
,
'mobilenet'
),
)
def
test_panoptic_deeplab_configs
(
self
,
config_name
,
backbone_type
):
config
=
exp_factory
.
get_exp_config
(
config_name
)
self
.
assertIsInstance
(
config
,
cfg
.
ExperimentConfig
)
self
.
assertIsInstance
(
config
.
task
,
exp_cfg
.
PanopticDeeplabTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
exp_cfg
.
PanopticDeeplab
)
self
.
assertIsInstance
(
config
.
task
.
train_data
,
exp_cfg
.
DataConfig
)
self
.
assertEqual
(
config
.
task
.
model
.
backbone
.
type
,
backbone_type
)
config
.
validate
()
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaisesRegex
(
KeyError
,
'Found inconsistncy between key'
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
View file @
2412b118
# MobileNetV3-large_1.0 ImageNet classification: ~75.
3
% top-1.
# MobileNetV3-large_1.0 ImageNet classification: ~75.
7
% top-1.
runtime
:
runtime
:
distribution_strategy
:
'
tpu'
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
mixed_precision_dtype
:
'
bfloat16'
...
@@ -27,10 +27,10 @@ task:
...
@@ -27,10 +27,10 @@ task:
dtype
:
'
bfloat16'
dtype
:
'
bfloat16'
aug_type
:
aug_type
:
autoaug
:
autoaug
:
augmentation_name
:
v0
augmentation_name
:
'
v0
'
cutout_const
:
100
cutout_const
:
100
translate_const
:
250
translate_const
:
250
type
:
autoaug
type
:
'
autoaug
'
validation_data
:
validation_data
:
input_path
:
'
imagenet-2012-tfrecord/valid*'
input_path
:
'
imagenet-2012-tfrecord/valid*'
is_training
:
false
is_training
:
false
...
@@ -38,7 +38,7 @@ task:
...
@@ -38,7 +38,7 @@ task:
dtype
:
'
bfloat16'
dtype
:
'
bfloat16'
drop_remainder
:
false
drop_remainder
:
false
trainer
:
trainer
:
train_steps
:
156
000
#
5
00 epochs
train_steps
:
218
000
#
7
00 epochs
validation_steps
:
13
validation_steps
:
13
validation_interval
:
312
validation_interval
:
312
steps_per_loop
:
312
# NUM_EXAMPLES (1281167) // global_batch_size
steps_per_loop
:
312
# NUM_EXAMPLES (1281167) // global_batch_size
...
@@ -48,7 +48,7 @@ trainer:
...
@@ -48,7 +48,7 @@ trainer:
learning_rate
:
learning_rate
:
cosine
:
cosine
:
alpha
:
0.0
alpha
:
0.0
decay_steps
:
156
000
decay_steps
:
218
000
initial_learning_rate
:
0.004
initial_learning_rate
:
0.004
name
:
CosineDecay
name
:
CosineDecay
offset
:
0
offset
:
0
...
...
official/vision/configs/experiments/image_classification/imagenet_mobilenetv3small_tpu.yaml
View file @
2412b118
# MobileNetV3Small ImageNet classification. 67.5% top-1 and 87.
6
% top-5 accuracy.
# MobileNetV3Small ImageNet classification. 67.5% top-1 and 87.
7
% top-5 accuracy.
runtime
:
runtime
:
distribution_strategy
:
'
tpu'
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
mixed_precision_dtype
:
'
bfloat16'
...
@@ -34,7 +34,7 @@ task:
...
@@ -34,7 +34,7 @@ task:
drop_remainder
:
false
drop_remainder
:
false
trainer
:
trainer
:
train_steps
:
312000
# 1000 epochs
train_steps
:
312000
# 1000 epochs
validation_steps
:
1
2
validation_steps
:
1
3
validation_interval
:
312
validation_interval
:
312
steps_per_loop
:
312
# NUM_EXAMPLES (1281167) // global_batch_size
steps_per_loop
:
312
# NUM_EXAMPLES (1281167) // global_batch_size
summary_interval
:
312
summary_interval
:
312
...
@@ -49,7 +49,7 @@ trainer:
...
@@ -49,7 +49,7 @@ trainer:
learning_rate
:
learning_rate
:
type
:
'
exponential'
type
:
'
exponential'
exponential
:
exponential
:
initial_learning_rate
:
0.
01
initial_learning_rate
:
0.
426
# 0.02 * (batch_size / 192)
decay_steps
:
936
# 3 * steps_per_epoch
decay_steps
:
936
# 3 * steps_per_epoch
decay_rate
:
0.99
decay_rate
:
0.99
staircase
:
true
staircase
:
true
...
@@ -60,4 +60,4 @@ trainer:
...
@@ -60,4 +60,4 @@ trainer:
type
:
'
linear'
type
:
'
linear'
linear
:
linear
:
warmup_steps
:
1560
warmup_steps
:
1560
warmup_learning_rate
:
0.0
01
warmup_learning_rate
:
0.0
official/vision/configs/retinanet.py
View file @
2412b118
...
@@ -107,6 +107,7 @@ class RetinaNetHead(hyperparams.Config):
...
@@ -107,6 +107,7 @@ class RetinaNetHead(hyperparams.Config):
num_filters
:
int
=
256
num_filters
:
int
=
256
use_separable_conv
:
bool
=
False
use_separable_conv
:
bool
=
False
attribute_heads
:
List
[
AttributeHead
]
=
dataclasses
.
field
(
default_factory
=
list
)
attribute_heads
:
List
[
AttributeHead
]
=
dataclasses
.
field
(
default_factory
=
list
)
share_classification_heads
:
bool
=
False
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
...
...
official/vision/dataloaders/classification_input.py
View file @
2412b118
...
@@ -254,6 +254,11 @@ class Parser(parser.Parser):
...
@@ -254,6 +254,11 @@ class Parser(parser.Parser):
return
image
return
image
def
parse_train_image
(
self
,
decoded_tensors
:
Dict
[
str
,
tf
.
Tensor
])
->
tf
.
Tensor
:
"""Public interface for parsing image data for training."""
return
self
.
_parse_train_image
(
decoded_tensors
)
@
classmethod
@
classmethod
def
inference_fn
(
cls
,
def
inference_fn
(
cls
,
image
:
tf
.
Tensor
,
image
:
tf
.
Tensor
,
...
...
official/vision/modeling/factory.py
View file @
2412b118
...
@@ -293,6 +293,7 @@ def build_retinanet(
...
@@ -293,6 +293,7 @@ def build_retinanet(
attribute_heads
=
[
attribute_heads
=
[
cfg
.
as_dict
()
for
cfg
in
(
head_config
.
attribute_heads
or
[])
cfg
.
as_dict
()
for
cfg
in
(
head_config
.
attribute_heads
or
[])
],
],
share_classification_heads
=
head_config
.
share_classification_heads
,
use_separable_conv
=
head_config
.
use_separable_conv
,
use_separable_conv
=
head_config
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
...
...
official/vision/modeling/heads/dense_prediction_heads.py
View file @
2412b118
...
@@ -37,6 +37,7 @@ class RetinaNetHead(tf.keras.layers.Layer):
...
@@ -37,6 +37,7 @@ class RetinaNetHead(tf.keras.layers.Layer):
num_convs
:
int
=
4
,
num_convs
:
int
=
4
,
num_filters
:
int
=
256
,
num_filters
:
int
=
256
,
attribute_heads
:
Optional
[
List
[
Dict
[
str
,
Any
]]]
=
None
,
attribute_heads
:
Optional
[
List
[
Dict
[
str
,
Any
]]]
=
None
,
share_classification_heads
:
bool
=
False
,
use_separable_conv
:
bool
=
False
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
use_sync_bn
:
bool
=
False
,
...
@@ -62,6 +63,8 @@ class RetinaNetHead(tf.keras.layers.Layer):
...
@@ -62,6 +63,8 @@ class RetinaNetHead(tf.keras.layers.Layer):
additional attribute head. Each dict consists of 3 key-value pairs:
additional attribute head. Each dict consists of 3 key-value pairs:
`name`, `type` ('regression' or 'classification'), and `size` (number
`name`, `type` ('regression' or 'classification'), and `size` (number
of predicted values for each instance).
of predicted values for each instance).
share_classification_heads: A `bool` that indicates whethere
sharing weights among the main and attribute classification heads.
use_separable_conv: A `bool` that indicates whether the separable
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
activation: A `str` that indicates which activation is used, e.g. 'relu',
...
@@ -88,6 +91,7 @@ class RetinaNetHead(tf.keras.layers.Layer):
...
@@ -88,6 +91,7 @@ class RetinaNetHead(tf.keras.layers.Layer):
'num_convs'
:
num_convs
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'num_filters'
:
num_filters
,
'attribute_heads'
:
attribute_heads
,
'attribute_heads'
:
attribute_heads
,
'share_classification_heads'
:
share_classification_heads
,
'use_separable_conv'
:
use_separable_conv
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'use_sync_bn'
:
use_sync_bn
,
...
@@ -216,7 +220,11 @@ class RetinaNetHead(tf.keras.layers.Layer):
...
@@ -216,7 +220,11 @@ class RetinaNetHead(tf.keras.layers.Layer):
this_level_att_norms
=
[]
this_level_att_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
if
level
==
self
.
_config_dict
[
'min_level'
]:
att_conv_name
=
'{}-conv_{}'
.
format
(
att_name
,
i
)
if
self
.
_config_dict
[
'share_classification_heads'
]
and
att_type
==
'classification'
:
att_conv_name
=
'classnet-conv_{}'
.
format
(
i
)
else
:
att_conv_name
=
'{}-conv_{}'
.
format
(
att_name
,
i
)
if
'kernel_initializer'
in
conv_kwargs
:
if
'kernel_initializer'
in
conv_kwargs
:
conv_kwargs
[
'kernel_initializer'
]
=
tf_utils
.
clone_initializer
(
conv_kwargs
[
'kernel_initializer'
]
=
tf_utils
.
clone_initializer
(
conv_kwargs
[
'kernel_initializer'
])
conv_kwargs
[
'kernel_initializer'
])
...
...
official/vision/modeling/heads/dense_prediction_heads_test.py
View file @
2412b118
...
@@ -25,14 +25,15 @@ from official.vision.modeling.heads import dense_prediction_heads
...
@@ -25,14 +25,15 @@ from official.vision.modeling.heads import dense_prediction_heads
class
RetinaNetHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
RetinaNetHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
@
parameterized
.
parameters
(
(
False
,
False
,
False
),
(
False
,
False
,
False
,
None
,
False
),
(
False
,
True
,
False
),
(
False
,
True
,
False
,
None
,
False
),
(
True
,
False
,
True
),
(
True
,
False
,
True
,
'regression'
,
False
),
(
True
,
True
,
True
),
(
True
,
True
,
True
,
'classification'
,
True
),
)
)
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
,
has_att_heads
):
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
,
has_att_heads
,
att_type
,
share_classification_heads
):
if
has_att_heads
:
if
has_att_heads
:
attribute_heads
=
[
dict
(
name
=
'depth'
,
type
=
'regression'
,
size
=
1
)]
attribute_heads
=
[
dict
(
name
=
'depth'
,
type
=
att_type
,
size
=
1
)]
else
:
else
:
attribute_heads
=
None
attribute_heads
=
None
...
@@ -44,6 +45,7 @@ class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -44,6 +45,7 @@ class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase):
num_convs
=
2
,
num_convs
=
2
,
num_filters
=
256
,
num_filters
=
256
,
attribute_heads
=
attribute_heads
,
attribute_heads
=
attribute_heads
,
share_classification_heads
=
share_classification_heads
,
use_separable_conv
=
use_separable_conv
,
use_separable_conv
=
use_separable_conv
,
activation
=
'relu'
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
use_sync_bn
=
use_sync_bn
,
...
...
official/vision/modeling/maskrcnn_model.py
View file @
2412b118
...
@@ -158,7 +158,7 @@ class MaskRCNNModel(tf.keras.Model):
...
@@ -158,7 +158,7 @@ class MaskRCNNModel(tf.keras.Model):
matched_gt_classes
=
intermediate_outputs
[
'matched_gt_classes'
],
matched_gt_classes
=
intermediate_outputs
[
'matched_gt_classes'
],
gt_masks
=
gt_masks
,
gt_masks
=
gt_masks
,
training
=
training
)
training
=
training
)
model_outputs
.
update
(
model_mask_outputs
)
model_outputs
.
update
(
model_mask_outputs
)
# pytype: disable=attribute-error # dynamic-method-lookup
return
model_outputs
return
model_outputs
def
_get_backbone_and_decoder_features
(
self
,
images
):
def
_get_backbone_and_decoder_features
(
self
,
images
):
...
...
official/vision/ops/preprocess_ops.py
View file @
2412b118
...
@@ -638,6 +638,53 @@ def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=1):
...
@@ -638,6 +638,53 @@ def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=1):
return
image
,
normalized_boxes
,
masks
return
image
,
normalized_boxes
,
masks
def
random_horizontal_flip_with_roi
(
image
:
tf
.
Tensor
,
boxes
:
Optional
[
tf
.
Tensor
]
=
None
,
masks
:
Optional
[
tf
.
Tensor
]
=
None
,
roi_boxes
:
Optional
[
tf
.
Tensor
]
=
None
,
seed
:
int
=
1
)
->
Tuple
[
tf
.
Tensor
,
Optional
[
tf
.
Tensor
],
Optional
[
tf
.
Tensor
],
Optional
[
tf
.
Tensor
]]:
"""Randomly flips input image and bounding boxes.
Extends preprocess_ops.random_horizontal_flip to also flip roi_boxes used
by ViLD.
Args:
image: `tf.Tensor`, the image to apply the random flip.
boxes: `tf.Tensor` or `None`, boxes corresponding to the image.
masks: `tf.Tensor` or `None`, masks corresponding to the image.
roi_boxes: `tf.Tensor` or `None`, RoIs corresponding to the image.
seed: Seed for Tensorflow's random number generator.
Returns:
image: `tf.Tensor`, flipped image.
boxes: `tf.Tensor` or `None`, flipped boxes corresponding to the image.
masks: `tf.Tensor` or `None`, flipped masks corresponding to the image.
roi_boxes: `tf.Tensor` or `None`, flipped RoIs corresponding to the image.
"""
with
tf
.
name_scope
(
'random_horizontal_flip'
):
do_flip
=
tf
.
greater
(
tf
.
random
.
uniform
([],
seed
=
seed
),
0.5
)
image
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_image
(
image
),
lambda
:
image
)
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_boxes
(
boxes
),
lambda
:
boxes
)
if
masks
is
not
None
:
masks
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_masks
(
masks
),
lambda
:
masks
)
if
roi_boxes
is
not
None
:
roi_boxes
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_boxes
(
roi_boxes
),
lambda
:
roi_boxes
)
return
image
,
boxes
,
masks
,
roi_boxes
def
color_jitter
(
image
:
tf
.
Tensor
,
def
color_jitter
(
image
:
tf
.
Tensor
,
brightness
:
Optional
[
float
]
=
0.
,
brightness
:
Optional
[
float
]
=
0.
,
contrast
:
Optional
[
float
]
=
0.
,
contrast
:
Optional
[
float
]
=
0.
,
...
...
official/vision/ops/preprocess_ops_3d.py
View file @
2412b118
...
@@ -18,8 +18,7 @@ from typing import Optional, Tuple
...
@@ -18,8 +18,7 @@ from typing import Optional, Tuple
import
tensorflow
as
tf
import
tensorflow
as
tf
def
_sample_or_pad_sequence_indices
(
sequence
:
tf
.
Tensor
,
def
_sample_or_pad_sequence_indices
(
sequence
:
tf
.
Tensor
,
num_steps
:
int
,
num_steps
:
int
,
stride
:
int
,
stride
:
int
,
offset
:
tf
.
Tensor
)
->
tf
.
Tensor
:
offset
:
tf
.
Tensor
)
->
tf
.
Tensor
:
"""Returns indices to take for sampling or padding sequences to fixed size."""
"""Returns indices to take for sampling or padding sequences to fixed size."""
...
@@ -28,18 +27,16 @@ def _sample_or_pad_sequence_indices(sequence: tf.Tensor,
...
@@ -28,18 +27,16 @@ def _sample_or_pad_sequence_indices(sequence: tf.Tensor,
# Repeats sequence until num_steps are available in total.
# Repeats sequence until num_steps are available in total.
max_length
=
num_steps
*
stride
+
offset
max_length
=
num_steps
*
stride
+
offset
num_repeats
=
tf
.
math
.
floordiv
(
num_repeats
=
tf
.
math
.
floordiv
(
max_length
+
sequence_length
-
1
,
max_length
+
sequence_length
-
1
,
sequence_length
)
sequence_length
)
sel_idx
=
tf
.
tile
(
sel_idx
,
[
num_repeats
])
sel_idx
=
tf
.
tile
(
sel_idx
,
[
num_repeats
])
steps
=
tf
.
range
(
offset
,
offset
+
num_steps
*
stride
,
stride
)
steps
=
tf
.
range
(
offset
,
offset
+
num_steps
*
stride
,
stride
)
return
tf
.
gather
(
sel_idx
,
steps
)
return
tf
.
gather
(
sel_idx
,
steps
)
def
sample_linspace_sequence
(
sequence
:
tf
.
Tensor
,
def
sample_linspace_sequence
(
sequence
:
tf
.
Tensor
,
num_windows
:
int
,
num_windows
:
int
,
num_steps
:
int
,
stride
:
int
)
->
tf
.
Tensor
:
num_steps
:
int
,
stride
:
int
)
->
tf
.
Tensor
:
"""Samples `num_windows` segments from sequence with linearly spaced offsets.
"""Samples `num_windows` segments from sequence with linearly spaced offsets.
The samples are concatenated in a single `tf.Tensor` in order to have the same
The samples are concatenated in a single `tf.Tensor` in order to have the same
...
@@ -66,11 +63,12 @@ def sample_linspace_sequence(sequence: tf.Tensor,
...
@@ -66,11 +63,12 @@ def sample_linspace_sequence(sequence: tf.Tensor,
all_indices
=
[]
all_indices
=
[]
for
i
in
range
(
num_windows
):
for
i
in
range
(
num_windows
):
all_indices
.
append
(
_sample_or_pad_sequence_indices
(
all_indices
.
append
(
sequence
=
sequence
,
_sample_or_pad_sequence_indices
(
num_steps
=
num_steps
,
sequence
=
sequence
,
stride
=
stride
,
num_steps
=
num_steps
,
offset
=
offsets
[
i
]))
stride
=
stride
,
offset
=
offsets
[
i
]))
indices
=
tf
.
concat
(
all_indices
,
axis
=
0
)
indices
=
tf
.
concat
(
all_indices
,
axis
=
0
)
indices
.
set_shape
((
num_windows
*
num_steps
,))
indices
.
set_shape
((
num_windows
*
num_steps
,))
...
@@ -110,25 +108,76 @@ def sample_sequence(sequence: tf.Tensor,
...
@@ -110,25 +108,76 @@ def sample_sequence(sequence: tf.Tensor,
sequence_length
>
(
num_steps
-
1
)
*
frame_stride
,
sequence_length
>
(
num_steps
-
1
)
*
frame_stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
frame_stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
frame_stride
,
lambda
:
sequence_length
)
lambda
:
sequence_length
)
offset
=
tf
.
random
.
uniform
(
offset
=
tf
.
random
.
uniform
((),
(),
maxval
=
tf
.
cast
(
max_offset
,
dtype
=
tf
.
int32
),
maxval
=
tf
.
cast
(
max_offset
,
dtype
=
tf
.
int32
),
dtype
=
tf
.
int32
,
dtype
=
tf
.
int32
,
seed
=
seed
)
seed
=
seed
)
else
:
else
:
offset
=
(
sequence_length
-
num_steps
*
stride
)
//
2
offset
=
(
sequence_length
-
num_steps
*
stride
)
//
2
offset
=
tf
.
maximum
(
0
,
offset
)
offset
=
tf
.
maximum
(
0
,
offset
)
indices
=
_sample_or_pad_sequence_indices
(
indices
=
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offset
)
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offset
)
indices
.
set_shape
((
num_steps
,))
indices
.
set_shape
((
num_steps
,))
return
tf
.
gather
(
sequence
,
indices
)
return
tf
.
gather
(
sequence
,
indices
)
def
sample_segment_sequence
(
sequence
:
tf
.
Tensor
,
num_frames
:
int
,
is_training
:
bool
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Samples a single segment of size `num_frames` from a given sequence.
This function follows the temporal segment network sampling style
(https://arxiv.org/abs/1608.00859). The video sequence would be divided into
`num_frames` non-overlapping segments with same length. If `is_training` is
`True`, we would randomly sampling one frame for each segment, and when
`is_training` is `False`, only the center frame of each segment is sampled.
Args:
sequence: Any tensor where the first dimension is timesteps.
num_frames: Number of frames to take.
is_training: A boolean indicating sampling in training or evaluation mode.
seed: A deterministic seed to use when sampling.
Returns:
A single `tf.Tensor` with first dimension `num_steps` with the sampled
segment.
"""
sequence_length
=
tf
.
shape
(
sequence
)[
0
]
sequence_length
=
tf
.
cast
(
sequence_length
,
tf
.
float32
)
segment_length
=
tf
.
cast
(
sequence_length
//
num_frames
,
tf
.
float32
)
segment_indices
=
tf
.
linspace
(
0.0
,
sequence_length
,
num_frames
+
1
)
segment_indices
=
tf
.
cast
(
segment_indices
,
tf
.
int32
)
if
is_training
:
segment_length
=
tf
.
cast
(
segment_length
,
tf
.
int32
)
# pylint:disable=g-long-lambda
segment_offsets
=
tf
.
cond
(
segment_length
==
0
,
lambda
:
tf
.
zeros
(
shape
=
(
num_frames
,),
dtype
=
tf
.
int32
),
lambda
:
tf
.
random
.
uniform
(
shape
=
(
num_frames
,),
minval
=
0
,
maxval
=
segment_length
,
dtype
=
tf
.
int32
,
seed
=
seed
))
# pylint:disable=g-long-lambda
else
:
# Only sampling central frame during inference for being deterministic.
segment_offsets
=
tf
.
ones
(
shape
=
(
num_frames
,),
dtype
=
tf
.
int32
)
*
tf
.
cast
(
segment_length
//
2
,
dtype
=
tf
.
int32
)
indices
=
segment_indices
[:
-
1
]
+
segment_offsets
indices
.
set_shape
((
num_frames
,))
return
tf
.
gather
(
sequence
,
indices
)
def
decode_jpeg
(
image_string
:
tf
.
Tensor
,
channels
:
int
=
0
)
->
tf
.
Tensor
:
def
decode_jpeg
(
image_string
:
tf
.
Tensor
,
channels
:
int
=
0
)
->
tf
.
Tensor
:
"""Decodes JPEG raw bytes string into a RGB uint8 Tensor.
"""Decodes JPEG raw bytes string into a RGB uint8 Tensor.
...
@@ -144,7 +193,9 @@ def decode_jpeg(image_string: tf.Tensor, channels: int = 0) -> tf.Tensor:
...
@@ -144,7 +193,9 @@ def decode_jpeg(image_string: tf.Tensor, channels: int = 0) -> tf.Tensor:
"""
"""
return
tf
.
map_fn
(
return
tf
.
map_fn
(
lambda
x
:
tf
.
image
.
decode_jpeg
(
x
,
channels
=
channels
),
lambda
x
:
tf
.
image
.
decode_jpeg
(
x
,
channels
=
channels
),
image_string
,
back_prop
=
False
,
dtype
=
tf
.
uint8
)
image_string
,
back_prop
=
False
,
dtype
=
tf
.
uint8
)
def
crop_image
(
frames
:
tf
.
Tensor
,
def
crop_image
(
frames
:
tf
.
Tensor
,
...
@@ -229,8 +280,7 @@ def crop_image(frames: tf.Tensor,
...
@@ -229,8 +280,7 @@ def crop_image(frames: tf.Tensor,
return
frames
return
frames
def
resize_smallest
(
frames
:
tf
.
Tensor
,
def
resize_smallest
(
frames
:
tf
.
Tensor
,
min_resize
:
int
)
->
tf
.
Tensor
:
min_resize
:
int
)
->
tf
.
Tensor
:
"""Resizes frames so that min(`height`, `width`) is equal to `min_resize`.
"""Resizes frames so that min(`height`, `width`) is equal to `min_resize`.
This function will not do anything if the min(`height`, `width`) is already
This function will not do anything if the min(`height`, `width`) is already
...
@@ -255,18 +305,15 @@ def resize_smallest(frames: tf.Tensor,
...
@@ -255,18 +305,15 @@ def resize_smallest(frames: tf.Tensor,
frames_resized
=
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
))
frames_resized
=
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
))
return
tf
.
cast
(
frames_resized
,
frames
.
dtype
)
return
tf
.
cast
(
frames_resized
,
frames
.
dtype
)
should_resize
=
tf
.
math
.
logical_or
(
tf
.
not_equal
(
input_w
,
output_w
),
should_resize
=
tf
.
math
.
logical_or
(
tf
.
not_equal
(
input_h
,
output_h
))
tf
.
not_equal
(
input_w
,
output_w
),
tf
.
not_equal
(
input_h
,
output_h
))
frames
=
tf
.
cond
(
should_resize
,
resize_fn
,
lambda
:
frames
)
frames
=
tf
.
cond
(
should_resize
,
resize_fn
,
lambda
:
frames
)
return
frames
return
frames
def
random_crop_resize
(
frames
:
tf
.
Tensor
,
def
random_crop_resize
(
frames
:
tf
.
Tensor
,
output_h
:
int
,
output_w
:
int
,
output_h
:
int
,
num_frames
:
int
,
num_channels
:
int
,
output_w
:
int
,
num_frames
:
int
,
num_channels
:
int
,
aspect_ratio
:
Tuple
[
float
,
float
],
aspect_ratio
:
Tuple
[
float
,
float
],
area_range
:
Tuple
[
float
,
float
])
->
tf
.
Tensor
:
area_range
:
Tuple
[
float
,
float
])
->
tf
.
Tensor
:
"""First crops clip with jittering and then resizes to (output_h, output_w).
"""First crops clip with jittering and then resizes to (output_h, output_w).
...
@@ -279,6 +326,7 @@ def random_crop_resize(frames: tf.Tensor,
...
@@ -279,6 +326,7 @@ def random_crop_resize(frames: tf.Tensor,
num_channels: Number of channels of the clip.
num_channels: Number of channels of the clip.
aspect_ratio: Float tuple with the aspect range for cropping.
aspect_ratio: Float tuple with the aspect range for cropping.
area_range: Float tuple with the area range for cropping.
area_range: Float tuple with the area range for cropping.
Returns:
Returns:
A Tensor of shape [timesteps, output_h, output_w, channels] of type
A Tensor of shape [timesteps, output_h, output_w, channels] of type
frames.dtype.
frames.dtype.
...
@@ -299,21 +347,16 @@ def random_crop_resize(frames: tf.Tensor,
...
@@ -299,21 +347,16 @@ def random_crop_resize(frames: tf.Tensor,
bbox_begin
,
bbox_size
,
_
=
sample_distorted_bbox
bbox_begin
,
bbox_size
,
_
=
sample_distorted_bbox
offset_y
,
offset_x
,
_
=
tf
.
unstack
(
bbox_begin
)
offset_y
,
offset_x
,
_
=
tf
.
unstack
(
bbox_begin
)
target_height
,
target_width
,
_
=
tf
.
unstack
(
bbox_size
)
target_height
,
target_width
,
_
=
tf
.
unstack
(
bbox_size
)
size
=
tf
.
convert_to_tensor
((
size
=
tf
.
convert_to_tensor
((
seq_len
,
target_height
,
target_width
,
channels
))
seq_len
,
target_height
,
target_width
,
channels
))
offset
=
tf
.
convert_to_tensor
((
0
,
offset_y
,
offset_x
,
0
))
offset
=
tf
.
convert_to_tensor
((
0
,
offset_y
,
offset_x
,
0
))
frames
=
tf
.
slice
(
frames
,
offset
,
size
)
frames
=
tf
.
slice
(
frames
,
offset
,
size
)
frames
=
tf
.
cast
(
frames
=
tf
.
cast
(
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
)),
frames
.
dtype
)
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
)),
frames
.
dtype
)
frames
.
set_shape
((
num_frames
,
output_h
,
output_w
,
num_channels
))
frames
.
set_shape
((
num_frames
,
output_h
,
output_w
,
num_channels
))
return
frames
return
frames
def
random_flip_left_right
(
def
random_flip_left_right
(
frames
:
tf
.
Tensor
,
frames
:
tf
.
Tensor
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Flips all the frames with a probability of 50%.
"""Flips all the frames with a probability of 50%.
Args:
Args:
...
@@ -324,12 +367,16 @@ def random_flip_left_right(
...
@@ -324,12 +367,16 @@ def random_flip_left_right(
A Tensor of shape [timesteps, output_h, output_w, channels] eventually
A Tensor of shape [timesteps, output_h, output_w, channels] eventually
flipped left right.
flipped left right.
"""
"""
is_flipped
=
tf
.
random
.
uniform
(
is_flipped
=
tf
.
random
.
uniform
((),
(),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
,
seed
=
seed
)
minval
=
0
,
maxval
=
2
,
frames
=
tf
.
cond
(
tf
.
equal
(
is_flipped
,
1
),
dtype
=
tf
.
int32
,
true_fn
=
lambda
:
tf
.
image
.
flip_left_right
(
frames
),
seed
=
seed
)
false_fn
=
lambda
:
frames
)
frames
=
tf
.
cond
(
tf
.
equal
(
is_flipped
,
1
),
true_fn
=
lambda
:
tf
.
image
.
flip_left_right
(
frames
),
false_fn
=
lambda
:
frames
)
return
frames
return
frames
...
...
official/vision/ops/preprocess_ops_3d_test.py
View file @
2412b118
...
@@ -72,6 +72,16 @@ class ParserUtilsTest(tf.test.TestCase):
...
@@ -72,6 +72,16 @@ class ParserUtilsTest(tf.test.TestCase):
self
.
assertBetween
(
offset_3
,
0
,
99
)
self
.
assertBetween
(
offset_3
,
0
,
99
)
self
.
assertAllEqual
(
sampled_seq_3
,
range
(
offset_3
,
offset_3
+
10
))
self
.
assertAllEqual
(
sampled_seq_3
,
range
(
offset_3
,
offset_3
+
10
))
def
test_sample_segment_sequence
(
self
):
sequence
=
tf
.
range
(
100
)
sampled_seq_1
=
preprocess_ops_3d
.
sample_segment_sequence
(
sequence
,
10
,
False
)
sampled_seq_2
=
preprocess_ops_3d
.
sample_segment_sequence
(
sequence
,
10
,
True
)
self
.
assertAllEqual
(
sampled_seq_1
,
[
5
+
i
*
10
for
i
in
range
(
10
)])
for
idx
,
v
in
enumerate
(
sampled_seq_2
):
self
.
assertBetween
(
v
-
idx
*
10
,
0
,
10
)
def
test_decode_jpeg
(
self
):
def
test_decode_jpeg
(
self
):
# Create a random RGB JPEG image.
# Create a random RGB JPEG image.
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
(
263
,
320
,
3
),
dtype
=
np
.
uint8
)
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
(
263
,
320
,
3
),
dtype
=
np
.
uint8
)
...
...
research/object_detection/meta_architectures/center_net_meta_arch.py
View file @
2412b118
...
@@ -4235,6 +4235,15 @@ class CenterNetMetaArch(model.DetectionModel):
...
@@ -4235,6 +4235,15 @@ class CenterNetMetaArch(model.DetectionModel):
axis
=-
2
)
axis
=-
2
)
multiclass_scores
=
postprocess_dict
[
multiclass_scores
=
postprocess_dict
[
fields
.
DetectionResultFields
.
detection_multiclass_scores
]
fields
.
DetectionResultFields
.
detection_multiclass_scores
]
num_classes
=
tf
.
shape
(
multiclass_scores
)[
2
]
class_mask
=
tf
.
cast
(
tf
.
one_hot
(
postprocess_dict
[
fields
.
DetectionResultFields
.
detection_classes
],
depth
=
num_classes
),
tf
.
bool
)
# Surpress the scores of those unselected classes to be zeros. Otherwise,
# the downstream NMS ops might be confused and introduce issues.
multiclass_scores
=
tf
.
where
(
class_mask
,
multiclass_scores
,
tf
.
zeros_like
(
multiclass_scores
))
num_valid_boxes
=
postprocess_dict
.
pop
(
num_valid_boxes
=
postprocess_dict
.
pop
(
fields
.
DetectionResultFields
.
num_detections
)
fields
.
DetectionResultFields
.
num_detections
)
# Remove scores and classes as NMS will compute these form multiclass
# Remove scores and classes as NMS will compute these form multiclass
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment