Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
4f7d403d
"git@developer.sourcefind.cn:change/sglang.git" did not exist on "e321c9711306e35dc8cf905830ebe5b810ff1bc1"
Commit
4f7d403d
authored
Jul 16, 2020
by
Kaushik Shivakumar
Browse files
remove meta arch
parent
00eac920
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
293 deletions
+0
-293
research/object_detection/meta_architectures/detr_meta_arch.py
...rch/object_detection/meta_architectures/detr_meta_arch.py
+0
-293
No files found.
research/object_detection/meta_architectures/detr_meta_arch.py
deleted
100644 → 0
View file @
00eac920
import
abc
import
collections
import
functools
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
import
tensorflow.compat.v2
as
tf2
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
model
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
from
object_detection.utils
import
shape_utils
from
object_detection.models
import
faster_rcnn_resnet_keras_feature_extractor
from
object_detection.meta_architectures
import
detr_transformer
from
object_detection.matchers
import
hungarian_matcher
class
DETRMetaArch
(
model
.
DetectionModel
):
def
__init__
(
self
):
self
.
num_queries
=
100
self
.
hidden_dimension
=
100
self
.
feature_extractor
=
faster_rcnn_resnet_keras_feature_extractor
.
FasterRCNNResnet50KerasFeatureExtractor
(
is_training
=
False
)
self
.
first_stage
=
self
.
feature_extractor
.
get_proposal_feature_extractor_model
()
self
.
target_assigner
=
target_assigner
.
create_target_assigner
(
'DETR'
,
'detection'
)
self
.
transformer
=
detr_transformer
.
Transformer
()
self
.
ffn
=
self
.
feature_extractor
.
get_box_classifier_feature_extractor_model
()
self
.
bboxes
=
tf
.
keras
.
layers
.
Dense
(
4
)
self
.
cls
=
tf
.
keras
.
layers
.
Dense
(
2
)
self
.
queries
=
tf
.
keras
.
Variable
(
tf
.
random
([
self
.
num_queries
,
self
.
hidden_dimension
]))
def
predict
(
self
,
preprocessed_inputs
,
true_image_shapes
,
**
side_inputs
):
x
=
self
.
first_stage
(
preprocessed_inputs
)
x
=
tf
.
reshape
(
x
,
[
x
.
shape
[
0
],
x
.
shape
[
1
]
*
x
.
shape
[
2
],
x
.
shape
[
3
]])
x
=
self
.
transformer
([
x
,
tf
.
repeat
(
tf
.
expand_dims
(
self
.
queries
,
0
),
x
.
shape
[
0
],
axis
=
0
)])
x
=
self
.
ffn
(
x
)
return
self
.
bboxes
(
x
),
self
.
cls
(
x
)
def
loss
(
self
,
prediction_dict
,
true_image_shapes
,
scope
=
None
):
return
1
def
preprocess
(
self
,
inputs
):
"""Feature-extractor specific preprocessing.
See base class.
For Faster R-CNN, we perform image resizing in the base class --- each
class subclassing FasterRCNNMetaArch is responsible for any additional
preprocessing (e.g., scaling pixel values to be in [-1, 1]).
Args:
inputs: a [batch, height_in, width_in, channels] float tensor representing
a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Raises:
ValueError: if inputs tensor does not have type tf.float32
"""
with
tf
.
name_scope
(
'Preprocessor'
):
(
resized_inputs
,
true_image_shapes
)
=
shape_utils
.
resize_images_and_return_shapes
(
inputs
,
self
.
_image_resizer_fn
)
return
(
self
.
feature_extractor
.
preprocess
(
resized_inputs
),
true_image_shapes
)
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
raise
NotImplementedError
(
"Model restoration implemented yet."
)
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
raise
NotImplementedError
(
"Model restoration implemented yet."
)
def
loss
(
self
,
prediction_dict
,
true_image_shapes
,
scope
=
None
):
"""Compute scalar loss tensors given prediction tensors.
If number_of_stages=1, only RPN related losses are computed (i.e.,
`rpn_localization_loss` and `rpn_objectness_loss`). Otherwise all
losses are computed.
Args:
prediction_dict: a dictionary holding prediction tensors (see the
documentation for the predict method. If number_of_stages=1, we
expect prediction_dict to contain `rpn_box_encodings`,
`rpn_objectness_predictions_with_background`, `rpn_features_to_crop`,
`image_shape`, and `anchors` fields. Otherwise we expect
prediction_dict to additionally contain `refined_box_encodings`,
`class_predictions_with_background`, `num_proposals`, and
`proposal_boxes` fields.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
scope: Optional scope name.
Returns:
a dictionary mapping loss keys (`first_stage_localization_loss`,
`first_stage_objectness_loss`, 'second_stage_localization_loss',
'second_stage_classification_loss') to scalar tensors representing
corresponding loss values.
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
groundtruth_masks_list
,
groundtruth_weights_list
)
=
self
.
_format_groundtruth_data
(
self
.
_image_batch_shape_2d
(
prediction_dict
[
'image_shape'
]))
loss_dict
=
self
.
_loss_box_classifier
(
prediction_dict
[
'refined_box_encodings'
],
prediction_dict
[
'class_predictions_with_background'
],
prediction_dict
[
'proposal_boxes'
],
prediction_dict
[
'num_proposals'
],
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
groundtruth_weights_list
,
prediction_dict
[
'image_shape'
],
prediction_dict
.
get
(
'mask_predictions'
),
groundtruth_masks_list
,
prediction_dict
.
get
(
fields
.
DetectionResultFields
.
detection_boxes
),
prediction_dict
.
get
(
fields
.
DetectionResultFields
.
num_detections
))
return
loss_dict
def
_loss_box_classifier
(
self
,
refined_box_encodings
,
class_predictions_with_background
,
proposal_boxes
,
num_proposals
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
groundtruth_weights_list
,
image_shape
,
prediction_masks
=
None
,
groundtruth_masks_list
=
None
,
detection_boxes
=
None
,
num_detections
=
None
):
"""Computes scalar box classifier loss tensors.
Uses self._detector_target_assigner to obtain regression and classification
targets for the second stage box classifier, optionally performs
hard mining, and returns losses. All losses are computed independently
for each image and then averaged across the batch.
Please note that for boxes and masks with multiple labels, the box
regression and mask prediction losses are only computed for one label.
This function assumes that the proposal boxes in the "padded" regions are
actually zero (and thus should not be matched to).
Args:
refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, box_coder.code_size] representing
predicted (final) refined box encodings. If using a shared box across
classes this will instead have shape
[total_num_proposals, 1, box_coder.code_size].
class_predictions_with_background: a 2-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors. Note that this tensor
*includes* background class predictions (at class index 0).
proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes.
num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
representing the number of proposals predicted for each image in
the batch.
groundtruth_boxlists: a list of BoxLists containing coordinates of the
groundtruth boxes.
groundtruth_classes_with_background_list: a list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
class targets with the 0th index assumed to map to the background class.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape.
prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
num_classes, mask_height, mask_width] containing the instance masks for
each box.
groundtruth_masks_list: an optional list of 3-D tensors of shape
[num_boxes, image_height, image_width] containing the instance masks for
each of the boxes.
detection_boxes: 3-D float tensor of shape [batch,
max_total_detections, 4] containing post-processed detection boxes in
normalized co-ordinates.
num_detections: 1-D int32 tensor of shape [batch] containing number of
valid detections in `detection_boxes`.
Returns:
a dictionary mapping loss keys ('second_stage_localization_loss',
'second_stage_classification_loss') to scalar tensors representing
corresponding loss values.
Raises:
ValueError: if `predict_instance_masks` in
second_stage_mask_rcnn_box_predictor is True and
`groundtruth_masks_list` is not provided.
"""
with
tf
.
name_scope
(
'BoxClassifierLoss'
):
paddings_indicator
=
self
.
_padded_batched_proposals_indicator
(
num_proposals
,
proposal_boxes
.
shape
[
1
])
proposal_boxlists
=
[
box_list
.
BoxList
(
proposal_boxes_single_image
)
for
proposal_boxes_single_image
in
tf
.
unstack
(
proposal_boxes
)]
batch_size
=
len
(
proposal_boxlists
)
num_proposals_or_one
=
tf
.
cast
(
tf
.
expand_dims
(
tf
.
maximum
(
num_proposals
,
tf
.
ones_like
(
num_proposals
)),
1
),
dtype
=
tf
.
float32
)
normalizer
=
tf
.
tile
(
num_proposals_or_one
,
[
1
,
self
.
max_num_proposals
])
*
batch_size
(
batch_cls_targets_with_background
,
batch_cls_weights
,
batch_reg_targets
,
batch_reg_weights
,
_
)
=
target_assigner
.
batch_assign_targets
(
target_assigner
=
self
.
_detector_target_assigner
,
anchors_batch
=
proposal_boxlists
,
gt_box_batch
=
groundtruth_boxlists
,
gt_class_targets_batch
=
groundtruth_classes_with_background_list
,
unmatched_class_label
=
tf
.
constant
(
[
1
]
+
self
.
_num_classes
*
[
0
],
dtype
=
tf
.
float32
),
gt_weights_batch
=
groundtruth_weights_list
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
self
.
max_num_proposals
,
-
1
])
flat_cls_targets_with_background
=
tf
.
reshape
(
batch_cls_targets_with_background
,
[
batch_size
*
self
.
max_num_proposals
,
-
1
])
one_hot_flat_cls_targets_with_background
=
tf
.
argmax
(
flat_cls_targets_with_background
,
axis
=
1
)
one_hot_flat_cls_targets_with_background
=
tf
.
one_hot
(
one_hot_flat_cls_targets_with_background
,
flat_cls_targets_with_background
.
get_shape
()[
1
])
# If using a shared box across classes use directly
if
refined_box_encodings
.
shape
[
1
]
==
1
:
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
else
:
reshaped_refined_box_encodings
=
(
self
.
_get_refined_encodings_for_postitive_class
(
refined_box_encodings
,
one_hot_flat_cls_targets_with_background
,
batch_size
))
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
fields
.
InputDataFields
.
is_annotated
))
second_stage_loc_losses
=
self
.
_second_stage_localization_loss
(
reshaped_refined_box_encodings
,
batch_reg_targets
,
weights
=
batch_reg_weights
,
losses_mask
=
losses_mask
)
/
normalizer
second_stage_cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
self
.
_second_stage_classification_loss
(
class_predictions_with_background
,
batch_cls_targets_with_background
,
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
normalizer
second_stage_loc_loss
=
tf
.
reduce_sum
(
second_stage_loc_losses
*
tf
.
cast
(
paddings_indicator
,
dtype
=
tf
.
float32
))
second_stage_cls_loss
=
tf
.
reduce_sum
(
second_stage_cls_losses
*
tf
.
cast
(
paddings_indicator
,
dtype
=
tf
.
float32
))
if
self
.
_hard_example_miner
:
(
second_stage_loc_loss
,
second_stage_cls_loss
)
=
self
.
_unpad_proposals_and_apply_hard_mining
(
proposal_boxlists
,
second_stage_loc_losses
,
second_stage_cls_losses
,
num_proposals
)
localization_loss
=
tf
.
multiply
(
self
.
_second_stage_loc_loss_weight
,
second_stage_loc_loss
,
name
=
'localization_loss'
)
classification_loss
=
tf
.
multiply
(
self
.
_second_stage_cls_loss_weight
,
second_stage_cls_loss
,
name
=
'classification_loss'
)
loss_dict
=
{
'Loss/BoxClassifierLoss/localization_loss'
:
localization_loss
,
'Loss/BoxClassifierLoss/classification_loss'
:
classification_loss
}
return
loss_dict
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment