Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
130970ca
Commit
130970ca
authored
Feb 04, 2022
by
Vighnesh Birodkar
Committed by
TF Object Detection Team
Feb 04, 2022
Browse files
Document use_only_last_stage flag in configs.
PiperOrigin-RevId: 426512912
parent
e5e8bf3c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
45 additions
and
32 deletions
+45
-32
research/object_detection/configs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config
...igs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config
+1
-1
research/object_detection/models/keras_models/resnet_v1.py
research/object_detection/models/keras_models/resnet_v1.py
+5
-2
research/object_detection/protos/center_net.proto
research/object_detection/protos/center_net.proto
+39
-29
No files found.
research/object_detection/configs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config
View file @
130970ca
...
...
@@ -3,7 +3,6 @@
# mask head. This config is only trained on masks from the VOC classes in COCO
# and achieves a mask mAP of 32.5% on non-VOC classes.
# [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-32
...
...
@@ -55,6 +54,7 @@ model {
classification_loss
{
weighted_sigmoid
{}
}
use_only_last_stage
:
true
allowed_masked_classes_ids
: [
1
,
# person
...
...
research/object_detection/models/keras_models/resnet_v1.py
View file @
130970ca
...
...
@@ -19,13 +19,16 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
tensorflow.python.keras.applications
import
resnet
import
tensorflow.compat.v1
as
tf
from
object_detection.core
import
freezable_batch_norm
from
object_detection.models.keras_models
import
model_utils
try
:
from
keras.applications
import
resnet
# pylint:disable=g-import-not-at-top
except
ImportError
:
from
tensorflow.python.keras.applications
import
resnet
# pylint:disable=g-import-not-at-top
def
_fixed_padding
(
inputs
,
kernel_size
,
rate
=
1
):
# pylint: disable=invalid-name
"""Pads the input along the spatial dimensions independently of input size.
...
...
research/object_detection/protos/center_net.proto
View file @
130970ca
...
...
@@ -220,8 +220,8 @@ message CenterNet {
// scores * exp((-distances^2) / (2 * sigma^2))
// where 'distances' is the distance between the heatmap peak location and
// the regressed joint location and 'sigma' is the Gaussian standard
// deviation used in generating the Gaussian heatmap target multiplied by
the
// 'std_dev_multiplier'.
// deviation used in generating the Gaussian heatmap target multiplied by
//
the
'std_dev_multiplier'.
optional
float
std_dev_multiplier
=
29
[
default
=
1.0
];
// The radius (in the unit of output pixel) around heatmap peak to assign
...
...
@@ -415,7 +415,7 @@ message CenterNet {
optional
int32
dim
=
3
[
default
=
256
];
// The dimension of the per-pixel embedding
optional
int32
pixel_embedding_dim
=
4
[
default
=
16
];
optional
int32
pixel_embedding_dim
=
4
[
default
=
16
];
// If set, masks are only kept for classes listed here. Masks are deleted
// for all other classes. Note that this is only done at training time, eval
...
...
@@ -424,75 +424,86 @@ message CenterNet {
// The size of cropped pixel embedding that goes into the 2D mask prediction
// network (RoI align).
optional
int32
mask_size
=
6
[
default
=
32
];
optional
int32
mask_size
=
6
[
default
=
32
];
// If set to a positive value, we subsample instances by this amount to
// save memory during training.
optional
int32
mask_num_subsamples
=
67
[
default
=
-
1
];
optional
int32
mask_num_subsamples
=
67
[
default
=
-
1
];
// Whether or not to use (x, y) coordinates as input to mask net.
optional
bool
use_xy
=
8
[
default
=
true
];
optional
bool
use_xy
=
8
[
default
=
true
];
// Defines the kind of architecture we want to use for mask network.
optional
string
network_type
=
9
[
default
=
"hourglass52"
];
optional
string
network_type
=
9
[
default
=
"hourglass52"
];
// Whether or not we want to use instance embedding in mask network.
optional
bool
use_instance_embedding
=
10
[
default
=
true
];
optional
bool
use_instance_embedding
=
10
[
default
=
true
];
// Number of channels in the inital block of the mask prediction network.
optional
int32
num_init_channels
=
11
[
default
=
64
];
optional
int32
num_init_channels
=
11
[
default
=
64
];
// Whether or not to predict masks at full resolution. If true, we predict
// masks at the resolution of the output stride. Otherwise, masks are
// predicted at resolution defined by mask_size
optional
bool
predict_full_resolution_masks
=
12
[
default
=
false
];
optional
bool
predict_full_resolution_masks
=
12
[
default
=
false
];
// If predict_full_resolution_masks is set, this parameter controls the size
// of cropped masks returned by post-process. To be compatible with the rest
// of the API, masks are always cropped and resized according to detected
// boxes in postprocess.
optional
int32
postprocess_crop_size
=
13
[
default
=
256
];
optional
int32
postprocess_crop_size
=
13
[
default
=
256
];
// The maximum relative amount by which boxes will be jittered before
// RoI crop happens. The x and y coordinates of the box are jittered
// relative to width and height respectively.
optional
float
max_roi_jitter_ratio
=
14
[
default
=
0.0
];
optional
float
max_roi_jitter_ratio
=
14
[
default
=
0.0
];
// The mode for jitterting box ROIs. See RandomJitterBoxes in
// preprocessor.proto for more details
optional
RandomJitterBoxes.JitterMode
jitter_mode
=
15
[
default
=
DEFAULT
];
optional
RandomJitterBoxes.JitterMode
jitter_mode
=
15
[
default
=
DEFAULT
];
// Weight for the box consistency loss as described in the BoxInst paper
// https://arxiv.org/abs/2012.02310
optional
float
box_consistency_loss_weight
=
16
[
default
=
0.0
];
optional
float
box_consistency_loss_weight
=
16
[
default
=
0.0
];
optional
float
color_consistency_threshold
=
17
[
default
=
0.4
];
optional
float
color_consistency_threshold
=
17
[
default
=
0.4
];
optional
int32
color_consistency_dilation
=
18
[
default
=
2
];
optional
int32
color_consistency_dilation
=
18
[
default
=
2
];
optional
float
color_consistency_loss_weight
=
19
[
default
=
0.0
];
optional
float
color_consistency_loss_weight
=
19
[
default
=
0.0
];
optional
LossNormalize
box_consistency_loss_normalize
=
20
[
default
=
NORMALIZE_AUTO
];
optional
LossNormalize
box_consistency_loss_normalize
=
20
[
default
=
NORMALIZE_AUTO
];
// If set, will use the bounding box tightness prior approach. This means
// that the max will be restricted to only be inside the box for both
// dimensions. See details here:
// https://papers.nips.cc/paper/2019/hash/e6e713296627dff6475085cc6a224464-Abstract.html
optional
bool
box_consistency_tightness
=
21
[
default
=
false
];
optional
bool
box_consistency_tightness
=
21
[
default
=
false
];
optional
int32
color_consistency_warmup_steps
=
22
[
default
=
0
];
optional
int32
color_consistency_warmup_steps
=
22
[
default
=
0
];
optional
int32
color_consistency_warmup_start
=
23
[
default
=
0
];
optional
int32
color_consistency_warmup_start
=
23
[
default
=
0
];
// This flag controls whether or not we use the outputs from only the
// last stage of the hourglass for training the mask-heads.
// DeepMAC has been refactored to process the entire batch at once,
// instead of the previous (simple) approach of processing one sample at
// a time. Because of this,
the memory consumption has increased and
//
it's crucial to only feed the mask head the last stage outputs
// from the hourglass. Doing so halves the memory requirement of the
//
mask head and does not cause a drop in evaluation metrics.
optional
bool
use_only_last_stage
=
24
[
default
=
false
];
// a time. Because of this,
we need to set this flag to continue using
//
the old models with the same training hardware.
//
This flag is not needed for 1024x1024 models. The performance and
// memory usage are same as before.
// For 512x512 models
// - Setting this flag to true will let the model train on TPU-v3 32
// chips. We observed a small (0.26 mAP) performance drop when doing so.
// - Setting this flag to false (default) increases the TPU requirement
// to TPU-v3 128 and reproduces previously demonstrated performance
// within error bars.
optional
bool
use_only_last_stage
=
24
[
default
=
false
];
}
optional
DeepMACMaskEstimation
deepmac_mask_estimation
=
14
;
...
...
@@ -506,7 +517,7 @@ message CenterNet {
}
enum
LossNormalize
{
NORMALIZE_AUTO
=
0
;
// SUM for 2D inputs (dice loss) and MEAN for others.
NORMALIZE_AUTO
=
0
;
// SUM for 2D inputs (dice loss) and MEAN for others.
NORMALIZE_GROUNDTRUTH_COUNT
=
1
;
NORMALIZE_BALANCED
=
3
;
}
...
...
@@ -547,4 +558,3 @@ message CenterNetFeatureExtractor {
optional
string
upsampling_interpolation
=
11
[
default
=
'nearest'
];
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment