Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
b92025a9
"test/vscode:/vscode.git/clone" did not exist on "efbae697b370a128a4a0354776f66057c205c994"
Commit
b92025a9
authored
Aug 18, 2021
by
anivegesana
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
into detection_generator_pr_2
parents
1b425791
37536370
Changes
108
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
775 additions
and
326 deletions
+775
-326
official/vision/beta/modeling/layers/detection_generator.py
official/vision/beta/modeling/layers/detection_generator.py
+23
-32
official/vision/beta/modeling/layers/nn_layers.py
official/vision/beta/modeling/layers/nn_layers.py
+8
-7
official/vision/beta/modeling/maskrcnn_model.py
official/vision/beta/modeling/maskrcnn_model.py
+51
-4
official/vision/beta/ops/box_ops.py
official/vision/beta/ops/box_ops.py
+70
-0
official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py
.../projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py
+92
-7
official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py
...epmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py
+4
-0
official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py
...beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py
+106
-142
official/vision/beta/projects/example/README.md
official/vision/beta/projects/example/README.md
+214
-0
official/vision/beta/projects/movinet/README.md
official/vision/beta/projects/movinet/README.md
+2
-2
official/vision/beta/projects/movinet/export_saved_model_test.py
...l/vision/beta/projects/movinet/export_saved_model_test.py
+2
-2
official/vision/beta/projects/movinet/modeling/movinet.py
official/vision/beta/projects/movinet/modeling/movinet.py
+24
-21
official/vision/beta/projects/movinet/modeling/movinet_layers.py
...l/vision/beta/projects/movinet/modeling/movinet_layers.py
+2
-2
official/vision/beta/projects/movinet/modeling/movinet_test.py
...ial/vision/beta/projects/movinet/modeling/movinet_test.py
+10
-10
official/vision/beta/projects/simclr/common/registry_imports.py
...al/vision/beta/projects/simclr/common/registry_imports.py
+0
-14
official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_multitask_tpu.yaml
...lr/configs/experiments/imagenet_simclr_multitask_tpu.yaml
+138
-0
official/vision/beta/projects/simclr/configs/multitask_config.py
...l/vision/beta/projects/simclr/configs/multitask_config.py
+8
-4
official/vision/beta/projects/simclr/configs/simclr.py
official/vision/beta/projects/simclr/configs/simclr.py
+20
-34
official/vision/beta/projects/simclr/configs/simclr_test.py
official/vision/beta/projects/simclr/configs/simclr_test.py
+1
-17
official/vision/beta/projects/simclr/dataloaders/preprocess_ops.py
...vision/beta/projects/simclr/dataloaders/preprocess_ops.py
+0
-14
official/vision/beta/projects/simclr/dataloaders/simclr_input.py
...l/vision/beta/projects/simclr/dataloaders/simclr_input.py
+0
-14
No files found.
official/vision/beta/modeling/layers/detection_generator.py
View file @
b92025a9
...
@@ -514,22 +514,22 @@ class DetectionGenerator(tf.keras.layers.Layer):
...
@@ -514,22 +514,22 @@ class DetectionGenerator(tf.keras.layers.Layer):
}
}
if
self
.
_config_dict
[
'use_batched_nms'
]:
if
self
.
_config_dict
[
'use_batched_nms'
]:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
_generate_detections_batched
(
decoded_boxes
,
decoded_boxes
,
box_scores
,
box_scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
self
.
_config_dict
[
'max_num_detections'
]))
else
:
else
:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
_
)
=
(
_generate_detections_v
2
(
_generate_detections_v
1
(
decoded_boxes
,
decoded_boxes
,
box_scores
,
box_scores
,
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
self
.
_config_dict
[
'pre_nms_score_threshold'
],
pre_nms_score_threshold
=
self
self
.
_config_dict
[
'nms_iou_threshold'
],
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
# Adds 1 to offset the background class which has index 0.
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
nmsed_classes
+=
1
...
@@ -714,18 +714,16 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
...
@@ -714,18 +714,16 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
if
raw_attributes
:
if
raw_attributes
:
raise
ValueError
(
'Attribute learning is not supported for batched NMS.'
)
raise
ValueError
(
'Attribute learning is not supported for batched NMS.'
)
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
_generate_detections_batched
(
boxes
,
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
self
.
_config_dict
[
'max_num_detections'
]))
# Set `nmsed_attributes` to None for batched NMS.
# Set `nmsed_attributes` to None for batched NMS.
nmsed_attributes
=
{}
nmsed_attributes
=
{}
else
:
else
:
if
raw_attributes
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
=
(
nmsed_attributes
)
=
(
_generate_detections_v1
(
_generate_detections_v1
(
boxes
,
boxes
,
scores
,
scores
,
...
@@ -735,14 +733,7 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
...
@@ -735,14 +733,7 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
.
_config_dict
[
'pre_nms_score_threshold'
],
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
else
:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
_generate_detections_v2
(
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_top_k'
],
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
nmsed_attributes
=
{}
# Adds 1 to offset the background class which has index 0.
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
nmsed_classes
+=
1
...
...
official/vision/beta/modeling/layers/nn_layers.py
View file @
b92025a9
...
@@ -165,7 +165,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
...
@@ -165,7 +165,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
def
build
(
self
,
input_shape
):
def
build
(
self
,
input_shape
):
num_reduced_filters
=
make_divisible
(
num_reduced_filters
=
make_divisible
(
self
.
_in_filters
*
self
.
_se_ratio
,
divisor
=
self
.
_divisible_by
)
max
(
1
,
int
(
self
.
_in_filters
*
self
.
_se_ratio
)),
divisor
=
self
.
_divisible_by
)
self
.
_se_reduce
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_se_reduce
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
num_reduced_filters
,
filters
=
num_reduced_filters
,
...
@@ -424,7 +425,7 @@ class PositionalEncoding(tf.keras.layers.Layer):
...
@@ -424,7 +425,7 @@ class PositionalEncoding(tf.keras.layers.Layer):
self
.
_rezero
=
Scale
(
initializer
=
initializer
,
name
=
'rezero'
)
self
.
_rezero
=
Scale
(
initializer
=
initializer
,
name
=
'rezero'
)
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
self
.
_state_prefix
=
state_prefix
self
.
_state_prefix
=
state_prefix
self
.
_frame_count_name
=
f
'
{
state_prefix
}
/
pos_enc_frame_count'
self
.
_frame_count_name
=
f
'
{
state_prefix
}
_
pos_enc_frame_count'
def
get_config
(
self
):
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
"""Returns a dictionary containing the config used for initialization."""
...
@@ -522,7 +523,7 @@ class PositionalEncoding(tf.keras.layers.Layer):
...
@@ -522,7 +523,7 @@ class PositionalEncoding(tf.keras.layers.Layer):
inputs: An input `tf.Tensor`.
inputs: An input `tf.Tensor`.
states: A `dict` of states such that, if any of the keys match for this
states: A `dict` of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s). Expected keys
layer, will overwrite the contents of the buffer(s). Expected keys
include `state_prefix + '
/
pos_enc_frame_count'`.
include `state_prefix + '
_
pos_enc_frame_count'`.
output_states: A `bool`. If True, returns the output tensor and output
output_states: A `bool`. If True, returns the output tensor and output
states. Returns just the output tensor otherwise.
states. Returns just the output tensor otherwise.
...
@@ -586,8 +587,8 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
...
@@ -586,8 +587,8 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
self
.
_state_prefix
=
state_prefix
self
.
_state_prefix
=
state_prefix
self
.
_state_name
=
f
'
{
state_prefix
}
/
pool_buffer'
self
.
_state_name
=
f
'
{
state_prefix
}
_
pool_buffer'
self
.
_frame_count_name
=
f
'
{
state_prefix
}
/
pool_frame_count'
self
.
_frame_count_name
=
f
'
{
state_prefix
}
_
pool_frame_count'
def
get_config
(
self
):
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
"""Returns a dictionary containing the config used for initialization."""
...
@@ -610,8 +611,8 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
...
@@ -610,8 +611,8 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
inputs: An input `tf.Tensor`.
inputs: An input `tf.Tensor`.
states: A `dict` of states such that, if any of the keys match for this
states: A `dict` of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s).
layer, will overwrite the contents of the buffer(s).
Expected keys include `state_prefix + '
/
pool_buffer'` and
Expected keys include `state_prefix + '
__
pool_buffer'` and
`state_prefix + '
/
pool_frame_count'`.
`state_prefix + '
__
pool_frame_count'`.
output_states: A `bool`. If True, returns the output tensor and output
output_states: A `bool`. If True, returns the output tensor and output
states. Returns just the output tensor otherwise.
states. Returns just the output tensor otherwise.
...
...
official/vision/beta/modeling/maskrcnn_model.py
View file @
b92025a9
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
"""Mask R-CNN model."""
"""Mask R-CNN model."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Union
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Tuple
,
Union
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -143,6 +143,34 @@ class MaskRCNNModel(tf.keras.Model):
...
@@ -143,6 +143,34 @@ class MaskRCNNModel(tf.keras.Model):
gt_classes
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_classes
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_masks
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_masks
:
Optional
[
tf
.
Tensor
]
=
None
,
training
:
Optional
[
bool
]
=
None
)
->
Mapping
[
str
,
tf
.
Tensor
]:
training
:
Optional
[
bool
]
=
None
)
->
Mapping
[
str
,
tf
.
Tensor
]:
model_outputs
,
intermediate_outputs
=
self
.
_call_box_outputs
(
images
=
images
,
image_shape
=
image_shape
,
anchor_boxes
=
anchor_boxes
,
gt_boxes
=
gt_boxes
,
gt_classes
=
gt_classes
,
training
=
training
)
if
not
self
.
_include_mask
:
return
model_outputs
model_mask_outputs
=
self
.
_call_mask_outputs
(
model_box_outputs
=
model_outputs
,
features
=
intermediate_outputs
[
'features'
],
current_rois
=
intermediate_outputs
[
'current_rois'
],
matched_gt_indices
=
intermediate_outputs
[
'matched_gt_indices'
],
matched_gt_boxes
=
intermediate_outputs
[
'matched_gt_boxes'
],
matched_gt_classes
=
intermediate_outputs
[
'matched_gt_classes'
],
gt_masks
=
gt_masks
,
training
=
training
)
model_outputs
.
update
(
model_mask_outputs
)
return
model_outputs
def
_call_box_outputs
(
self
,
images
:
tf
.
Tensor
,
image_shape
:
tf
.
Tensor
,
anchor_boxes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
gt_boxes
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_classes
:
Optional
[
tf
.
Tensor
]
=
None
,
training
:
Optional
[
bool
]
=
None
)
->
Tuple
[
Mapping
[
str
,
tf
.
Tensor
],
Mapping
[
str
,
tf
.
Tensor
]]:
"""Implementation of the Faster-RCNN logic for boxes."""
model_outputs
=
{}
model_outputs
=
{}
# Feature extraction.
# Feature extraction.
...
@@ -239,9 +267,28 @@ class MaskRCNNModel(tf.keras.Model):
...
@@ -239,9 +267,28 @@ class MaskRCNNModel(tf.keras.Model):
'decoded_box_scores'
:
detections
[
'decoded_box_scores'
]
'decoded_box_scores'
:
detections
[
'decoded_box_scores'
]
})
})
if
not
self
.
_include_mask
:
intermediate_outputs
=
{
return
model_outputs
'matched_gt_boxes'
:
matched_gt_boxes
,
'matched_gt_indices'
:
matched_gt_indices
,
'matched_gt_classes'
:
matched_gt_classes
,
'features'
:
features
,
'current_rois'
:
current_rois
,
}
return
(
model_outputs
,
intermediate_outputs
)
def
_call_mask_outputs
(
self
,
model_box_outputs
:
Mapping
[
str
,
tf
.
Tensor
],
features
:
tf
.
Tensor
,
current_rois
:
tf
.
Tensor
,
matched_gt_indices
:
tf
.
Tensor
,
matched_gt_boxes
:
tf
.
Tensor
,
matched_gt_classes
:
tf
.
Tensor
,
gt_masks
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
Mapping
[
str
,
tf
.
Tensor
]:
"""Implementation of Mask-RCNN mask prediction logic."""
model_outputs
=
dict
(
model_box_outputs
)
if
training
:
if
training
:
current_rois
,
roi_classes
,
roi_masks
=
self
.
mask_sampler
(
current_rois
,
roi_classes
,
roi_masks
=
self
.
mask_sampler
(
current_rois
,
matched_gt_boxes
,
matched_gt_classes
,
current_rois
,
matched_gt_boxes
,
matched_gt_classes
,
...
...
official/vision/beta/ops/box_ops.py
View file @
b92025a9
...
@@ -624,6 +624,76 @@ def bbox_overlap(boxes, gt_boxes):
...
@@ -624,6 +624,76 @@ def bbox_overlap(boxes, gt_boxes):
return
iou
return
iou
def
bbox_generalized_overlap
(
boxes
,
gt_boxes
):
"""Calculates the GIOU between proposal and ground truth boxes.
The generalized intersection of union is an adjustment of the traditional IOU
metric which provides continuous updates even for predictions with no overlap.
This metric is defined in https://giou.stanford.edu/GIoU.pdf. Note, some
`gt_boxes` may have been padded. The returned `giou` tensor for these boxes
will be -1.
Args:
boxes: a `Tensor` with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a `Tensor` with a shape of [batch_size, max_num_instances, 4].
This tensor may have paddings with a negative value and will also be in
the [ymin, xmin, ymax, xmax] format.
Returns:
giou: a `Tensor` with as a shape of [batch_size, N, max_num_instances].
"""
with
tf
.
name_scope
(
'bbox_generalized_overlap'
):
assert
boxes
.
shape
.
as_list
(
)[
-
1
]
==
4
,
'Boxes must be defined by 4 coordinates.'
assert
gt_boxes
.
shape
.
as_list
(
)[
-
1
]
==
4
,
'Groundtruth boxes must be defined by 4 coordinates.'
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
gt_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
# Calculates the hull area for each pair of boxes, with one from
# boxes and the other from gt_boxes.
# Outputs for coordinates are of shape [batch_size, N, max_num_instances]
h_xmin
=
tf
.
minimum
(
bb_x_min
,
tf
.
transpose
(
gt_x_min
,
[
0
,
2
,
1
]))
h_xmax
=
tf
.
maximum
(
bb_x_max
,
tf
.
transpose
(
gt_x_max
,
[
0
,
2
,
1
]))
h_ymin
=
tf
.
minimum
(
bb_y_min
,
tf
.
transpose
(
gt_y_min
,
[
0
,
2
,
1
]))
h_ymax
=
tf
.
maximum
(
bb_y_max
,
tf
.
transpose
(
gt_y_max
,
[
0
,
2
,
1
]))
h_area
=
tf
.
maximum
((
h_xmax
-
h_xmin
),
0
)
*
tf
.
maximum
((
h_ymax
-
h_ymin
),
0
)
# Add a small epsilon to avoid divide-by-zero.
h_area
=
h_area
+
1e-8
# Calculates the intersection area.
i_xmin
=
tf
.
maximum
(
bb_x_min
,
tf
.
transpose
(
gt_x_min
,
[
0
,
2
,
1
]))
i_xmax
=
tf
.
minimum
(
bb_x_max
,
tf
.
transpose
(
gt_x_max
,
[
0
,
2
,
1
]))
i_ymin
=
tf
.
maximum
(
bb_y_min
,
tf
.
transpose
(
gt_y_min
,
[
0
,
2
,
1
]))
i_ymax
=
tf
.
minimum
(
bb_y_max
,
tf
.
transpose
(
gt_y_max
,
[
0
,
2
,
1
]))
i_area
=
tf
.
maximum
((
i_xmax
-
i_xmin
),
0
)
*
tf
.
maximum
((
i_ymax
-
i_ymin
),
0
)
# Calculates the union area.
bb_area
=
(
bb_y_max
-
bb_y_min
)
*
(
bb_x_max
-
bb_x_min
)
gt_area
=
(
gt_y_max
-
gt_y_min
)
*
(
gt_x_max
-
gt_x_min
)
# Adds a small epsilon to avoid divide-by-zero.
u_area
=
bb_area
+
tf
.
transpose
(
gt_area
,
[
0
,
2
,
1
])
-
i_area
+
1e-8
# Calculates IoU.
iou
=
i_area
/
u_area
# Calculates GIoU.
giou
=
iou
-
(
h_area
-
u_area
)
/
h_area
# Fills -1 for GIoU entries between the padded ground truth boxes.
gt_invalid_mask
=
tf
.
less
(
tf
.
reduce_max
(
gt_boxes
,
axis
=-
1
,
keepdims
=
True
),
0.0
)
padding_mask
=
tf
.
broadcast_to
(
tf
.
transpose
(
gt_invalid_mask
,
[
0
,
2
,
1
]),
tf
.
shape
(
giou
))
giou
=
tf
.
where
(
padding_mask
,
-
tf
.
ones_like
(
giou
),
giou
)
return
giou
def
box_matching
(
boxes
,
gt_boxes
,
gt_classes
):
def
box_matching
(
boxes
,
gt_boxes
,
gt_classes
):
"""Match boxes to groundtruth boxes.
"""Match boxes to groundtruth boxes.
...
...
official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py
View file @
b92025a9
...
@@ -22,6 +22,9 @@ import dataclasses
...
@@ -22,6 +22,9 @@ import dataclasses
from
official.core
import
config_definitions
as
cfg
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.core
import
exp_factory
from
official.modeling
import
optimization
from
official.modeling
import
optimization
from
official.vision.beta.configs
import
backbones
from
official.vision.beta.configs
import
common
from
official.vision.beta.configs
import
decoders
from
official.vision.beta.configs
import
maskrcnn
as
maskrcnn_config
from
official.vision.beta.configs
import
maskrcnn
as
maskrcnn_config
from
official.vision.beta.configs
import
retinanet
as
retinanet_config
from
official.vision.beta.configs
import
retinanet
as
retinanet_config
...
@@ -59,20 +62,18 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
...
@@ -59,20 +62,18 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
annotation_file
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
annotation_file
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
'instances_val2017.json'
),
model
=
DeepMaskHeadRCNN
(
model
=
DeepMaskHeadRCNN
(
num_classes
=
91
,
num_classes
=
91
,
input_size
=
[
1024
,
1024
,
3
],
include_mask
=
True
),
# pytype: disable=wrong-keyword-args
input_size
=
[
1024
,
1024
,
3
],
include_mask
=
True
),
# pytype: disable=wrong-keyword-args
losses
=
maskrcnn_config
.
Losses
(
l2_weight_decay
=
0.00004
),
losses
=
maskrcnn_config
.
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
maskrcnn_config
.
DataConfig
(
train_data
=
maskrcnn_config
.
DataConfig
(
input_path
=
os
.
path
.
join
(
input_path
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'train*'
),
'train*'
),
is_training
=
True
,
is_training
=
True
,
global_batch_size
=
global_batch_size
,
global_batch_size
=
global_batch_size
,
parser
=
maskrcnn_config
.
Parser
(
parser
=
maskrcnn_config
.
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.8
,
aug_scale_max
=
1.25
)),
aug_rand_hflip
=
True
,
aug_scale_min
=
0.8
,
aug_scale_max
=
1.25
)),
validation_data
=
maskrcnn_config
.
DataConfig
(
validation_data
=
maskrcnn_config
.
DataConfig
(
input_path
=
os
.
path
.
join
(
input_path
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'val*'
),
'val*'
),
is_training
=
False
,
is_training
=
False
,
global_batch_size
=
8
)),
# pytype: disable=wrong-keyword-args
global_batch_size
=
8
)),
# pytype: disable=wrong-keyword-args
trainer
=
cfg
.
TrainerConfig
(
trainer
=
cfg
.
TrainerConfig
(
...
@@ -110,3 +111,87 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
...
@@ -110,3 +111,87 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
])
])
return
config
return
config
@
exp_factory
.
register_config_factory
(
'deep_mask_head_rcnn_spinenet_coco'
)
def
deep_mask_head_rcnn_spinenet_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with Mask R-CNN with SpineNet backbone."""
steps_per_epoch
=
463
coco_val_samples
=
5000
train_batch_size
=
256
eval_batch_size
=
8
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
DeepMaskHeadRCNNTask
(
annotation_file
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
# pytype: disable=wrong-keyword-args
model
=
DeepMaskHeadRCNN
(
backbone
=
backbones
.
Backbone
(
type
=
'spinenet'
,
spinenet
=
backbones
.
SpineNet
(
model_id
=
'49'
,
min_level
=
3
,
max_level
=
7
,
)),
decoder
=
decoders
.
Decoder
(
type
=
'identity'
,
identity
=
decoders
.
Identity
()),
anchor
=
maskrcnn_config
.
Anchor
(
anchor_size
=
3
),
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
),
num_classes
=
91
,
input_size
=
[
640
,
640
,
3
],
min_level
=
3
,
max_level
=
7
,
include_mask
=
True
),
# pytype: disable=wrong-keyword-args
losses
=
maskrcnn_config
.
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
maskrcnn_config
.
DataConfig
(
input_path
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
maskrcnn_config
.
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.5
,
aug_scale_max
=
2.0
)),
validation_data
=
maskrcnn_config
.
DataConfig
(
input_path
=
os
.
path
.
join
(
maskrcnn_config
.
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
False
)),
# pytype: disable=wrong-keyword-args
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
steps_per_epoch
*
350
,
validation_steps
=
coco_val_samples
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
steps_per_epoch
*
320
,
steps_per_epoch
*
340
],
'values'
:
[
0.32
,
0.032
,
0.0032
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
,
'task.model.min_level == task.model.backbone.spinenet.min_level'
,
'task.model.max_level == task.model.backbone.spinenet.max_level'
,
])
return
config
official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py
View file @
b92025a9
...
@@ -25,6 +25,10 @@ class DeepMaskHeadRcnnConfigTest(tf.test.TestCase):
...
@@ -25,6 +25,10 @@ class DeepMaskHeadRcnnConfigTest(tf.test.TestCase):
config
=
deep_mask_head_rcnn
.
deep_mask_head_rcnn_resnetfpn_coco
()
config
=
deep_mask_head_rcnn
.
deep_mask_head_rcnn_resnetfpn_coco
()
self
.
assertIsInstance
(
config
.
task
,
deep_mask_head_rcnn
.
DeepMaskHeadRCNNTask
)
self
.
assertIsInstance
(
config
.
task
,
deep_mask_head_rcnn
.
DeepMaskHeadRCNNTask
)
def
test_config_spinenet
(
self
):
config
=
deep_mask_head_rcnn
.
deep_mask_head_rcnn_spinenet_coco
()
self
.
assertIsInstance
(
config
.
task
,
deep_mask_head_rcnn
.
DeepMaskHeadRCNNTask
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py
View file @
b92025a9
...
@@ -14,12 +14,14 @@
...
@@ -14,12 +14,14 @@
"""Mask R-CNN model."""
"""Mask R-CNN model."""
from
typing
import
List
,
Mapping
,
Optional
,
Union
# Import libraries
# Import libraries
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.beta.
ops
import
box_ops
from
official.vision.beta.
modeling
import
maskrcnn_model
def
resize_as
(
source
,
size
):
def
resize_as
(
source
,
size
):
...
@@ -30,21 +32,30 @@ def resize_as(source, size):
...
@@ -30,21 +32,30 @@ def resize_as(source, size):
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DeepMaskRCNNModel
(
tf
.
keras
.
Model
):
class
DeepMaskRCNNModel
(
maskrcnn_model
.
MaskRCNN
Model
):
"""The Mask R-CNN model."""
"""The Mask R-CNN model."""
def
__init__
(
self
,
def
__init__
(
self
,
backbone
,
backbone
:
tf
.
keras
.
Model
,
decoder
,
decoder
:
tf
.
keras
.
Model
,
rpn_head
,
rpn_head
:
tf
.
keras
.
layers
.
Layer
,
detection_head
,
detection_head
:
Union
[
tf
.
keras
.
layers
.
Layer
,
roi_generator
,
List
[
tf
.
keras
.
layers
.
Layer
]],
roi_sampler
,
roi_generator
:
tf
.
keras
.
layers
.
Layer
,
roi_aligner
,
roi_sampler
:
Union
[
tf
.
keras
.
layers
.
Layer
,
detection_generator
,
List
[
tf
.
keras
.
layers
.
Layer
]],
mask_head
=
None
,
roi_aligner
:
tf
.
keras
.
layers
.
Layer
,
mask_sampler
=
None
,
detection_generator
:
tf
.
keras
.
layers
.
Layer
,
mask_roi_aligner
=
None
,
mask_head
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
mask_sampler
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
mask_roi_aligner
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
class_agnostic_bbox_pred
:
bool
=
False
,
cascade_class_ensemble
:
bool
=
False
,
min_level
:
Optional
[
int
]
=
None
,
max_level
:
Optional
[
int
]
=
None
,
num_scales
:
Optional
[
int
]
=
None
,
aspect_ratios
:
Optional
[
List
[
float
]]
=
None
,
anchor_size
:
Optional
[
float
]
=
None
,
use_gt_boxes_for_masks
=
False
,
use_gt_boxes_for_masks
=
False
,
**
kwargs
):
**
kwargs
):
"""Initializes the Mask R-CNN model.
"""Initializes the Mask R-CNN model.
...
@@ -53,122 +64,99 @@ class DeepMaskRCNNModel(tf.keras.Model):
...
@@ -53,122 +64,99 @@ class DeepMaskRCNNModel(tf.keras.Model):
backbone: `tf.keras.Model`, the backbone network.
backbone: `tf.keras.Model`, the backbone network.
decoder: `tf.keras.Model`, the decoder network.
decoder: `tf.keras.Model`, the decoder network.
rpn_head: the RPN head.
rpn_head: the RPN head.
detection_head: the detection head.
detection_head: the detection head
or a list of heads
.
roi_generator: the ROI generator.
roi_generator: the ROI generator.
roi_sampler: the ROI sampler.
roi_sampler: a single ROI sampler or a list of ROI samplers for cascade
detection heads.
roi_aligner: the ROI aligner.
roi_aligner: the ROI aligner.
detection_generator: the detection generator.
detection_generator: the detection generator.
mask_head: the mask head.
mask_head: the mask head.
mask_sampler: the mask sampler.
mask_sampler: the mask sampler.
mask_roi_aligner: the ROI alginer for mask prediction.
mask_roi_aligner: the ROI alginer for mask prediction.
use_gt_boxes_for_masks: bool, if set, crop using groundtruth boxes
class_agnostic_bbox_pred: if True, perform class agnostic bounding box
instead of proposals for training mask head
prediction. Needs to be `True` for Cascade RCNN models.
cascade_class_ensemble: if True, ensemble classification scores over all
detection heads.
min_level: Minimum level in output feature maps.
max_level: Maximum level in output feature maps.
num_scales: A number representing intermediate scales added on each level.
For instances, num_scales=2 adds one additional intermediate anchor
scales [2^0, 2^0.5] on each level.
aspect_ratios: A list representing the aspect raito anchors added on each
level. The number indicates the ratio of width to height. For instances,
aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level.
anchor_size: A number representing the scale of size of the base anchor to
the feature stride 2^level.
use_gt_boxes_for_masks: bool, if set, crop using groundtruth boxes instead
of proposals for training mask head
**kwargs: keyword arguments to be passed.
**kwargs: keyword arguments to be passed.
"""
"""
super
(
DeepMaskRCNNModel
,
self
).
__init__
(
**
kwargs
)
super
(
DeepMaskRCNNModel
,
self
).
__init__
(
self
.
_config_dict
=
{
backbone
=
backbone
,
'backbone'
:
backbone
,
decoder
=
decoder
,
'decoder'
:
decoder
,
rpn_head
=
rpn_head
,
'rpn_head'
:
rpn_head
,
detection_head
=
detection_head
,
'detection_head'
:
detection_head
,
roi_generator
=
roi_generator
,
'roi_generator'
:
roi_generator
,
roi_sampler
=
roi_sampler
,
'roi_sampler'
:
roi_sampler
,
roi_aligner
=
roi_aligner
,
'roi_aligner'
:
roi_aligner
,
detection_generator
=
detection_generator
,
'detection_generator'
:
detection_generator
,
mask_head
=
mask_head
,
'mask_head'
:
mask_head
,
mask_sampler
=
mask_sampler
,
'mask_sampler'
:
mask_sampler
,
mask_roi_aligner
=
mask_roi_aligner
,
'mask_roi_aligner'
:
mask_roi_aligner
,
class_agnostic_bbox_pred
=
class_agnostic_bbox_pred
,
'use_gt_boxes_for_masks'
:
use_gt_boxes_for_masks
cascade_class_ensemble
=
cascade_class_ensemble
,
}
min_level
=
min_level
,
self
.
backbone
=
backbone
max_level
=
max_level
,
self
.
decoder
=
decoder
num_scales
=
num_scales
,
self
.
rpn_head
=
rpn_head
aspect_ratios
=
aspect_ratios
,
self
.
detection_head
=
detection_head
anchor_size
=
anchor_size
,
self
.
roi_generator
=
roi_generator
**
kwargs
)
self
.
roi_sampler
=
roi_sampler
self
.
roi_aligner
=
roi_aligner
self
.
_config_dict
[
'use_gt_boxes_for_masks'
]
=
use_gt_boxes_for_masks
self
.
detection_generator
=
detection_generator
self
.
_include_mask
=
mask_head
is
not
None
self
.
mask_head
=
mask_head
if
self
.
_include_mask
and
mask_sampler
is
None
:
raise
ValueError
(
'`mask_sampler` is not provided in Mask R-CNN.'
)
self
.
mask_sampler
=
mask_sampler
if
self
.
_include_mask
and
mask_roi_aligner
is
None
:
raise
ValueError
(
'`mask_roi_aligner` is not provided in Mask R-CNN.'
)
self
.
mask_roi_aligner
=
mask_roi_aligner
def
call
(
self
,
def
call
(
self
,
images
,
images
:
tf
.
Tensor
,
image_shape
,
image_shape
:
tf
.
Tensor
,
anchor_boxes
=
None
,
anchor_boxes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
gt_boxes
=
None
,
gt_boxes
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_classes
=
None
,
gt_classes
:
Optional
[
tf
.
Tensor
]
=
None
,
gt_masks
=
None
,
gt_masks
:
Optional
[
tf
.
Tensor
]
=
None
,
training
=
None
):
training
:
Optional
[
bool
]
=
None
)
->
Mapping
[
str
,
tf
.
Tensor
]:
model_outputs
=
{}
model_outputs
,
intermediate_outputs
=
self
.
_call_box_outputs
(
# Feature extraction.
images
=
images
,
image_shape
=
image_shape
,
anchor_boxes
=
anchor_boxes
,
features
=
self
.
backbone
(
images
)
gt_boxes
=
gt_boxes
,
gt_classes
=
gt_classes
,
training
=
training
)
if
self
.
decoder
:
features
=
self
.
decoder
(
features
)
# Region proposal network.
rpn_scores
,
rpn_boxes
=
self
.
rpn_head
(
features
)
model_outputs
.
update
({
'rpn_boxes'
:
rpn_boxes
,
'rpn_scores'
:
rpn_scores
})
# Generate RoIs.
rois
,
_
=
self
.
roi_generator
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
,
image_shape
,
training
)
if
training
:
rois
=
tf
.
stop_gradient
(
rois
)
rois
,
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
=
(
self
.
roi_sampler
(
rois
,
gt_boxes
,
gt_classes
))
# Assign target for the 2nd stage classification.
box_targets
=
box_ops
.
encode_boxes
(
matched_gt_boxes
,
rois
,
weights
=
[
10.0
,
10.0
,
5.0
,
5.0
])
# If the target is background, the box target is set to all 0s.
box_targets
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
tf
.
equal
(
matched_gt_classes
,
0
),
axis
=-
1
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
box_targets
),
box_targets
)
model_outputs
.
update
({
'class_targets'
:
matched_gt_classes
,
'box_targets'
:
box_targets
,
})
# RoI align.
roi_features
=
self
.
roi_aligner
(
features
,
rois
)
# Detection head.
raw_scores
,
raw_boxes
=
self
.
detection_head
(
roi_features
)
if
training
:
model_outputs
.
update
({
'class_outputs'
:
raw_scores
,
'box_outputs'
:
raw_boxes
,
})
else
:
# Post-processing.
detections
=
self
.
detection_generator
(
raw_boxes
,
raw_scores
,
rois
,
image_shape
)
model_outputs
.
update
({
'detection_boxes'
:
detections
[
'detection_boxes'
],
'detection_scores'
:
detections
[
'detection_scores'
],
'detection_classes'
:
detections
[
'detection_classes'
],
'num_detections'
:
detections
[
'num_detections'
],
})
if
not
self
.
_include_mask
:
if
not
self
.
_include_mask
:
return
model_outputs
return
model_outputs
model_mask_outputs
=
self
.
_call_mask_outputs
(
model_box_outputs
=
model_outputs
,
features
=
intermediate_outputs
[
'features'
],
current_rois
=
intermediate_outputs
[
'current_rois'
],
matched_gt_indices
=
intermediate_outputs
[
'matched_gt_indices'
],
matched_gt_boxes
=
intermediate_outputs
[
'matched_gt_boxes'
],
matched_gt_classes
=
intermediate_outputs
[
'matched_gt_classes'
],
gt_masks
=
gt_masks
,
gt_classes
=
gt_classes
,
gt_boxes
=
gt_boxes
,
training
=
training
)
model_outputs
.
update
(
model_mask_outputs
)
return
model_outputs
def
_call_mask_outputs
(
self
,
model_box_outputs
:
Mapping
[
str
,
tf
.
Tensor
],
features
:
tf
.
Tensor
,
current_rois
:
tf
.
Tensor
,
matched_gt_indices
:
tf
.
Tensor
,
matched_gt_boxes
:
tf
.
Tensor
,
matched_gt_classes
:
tf
.
Tensor
,
gt_masks
:
tf
.
Tensor
,
gt_classes
:
tf
.
Tensor
,
gt_boxes
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
Mapping
[
str
,
tf
.
Tensor
]:
model_outputs
=
dict
(
model_box_outputs
)
if
training
:
if
training
:
if
self
.
_config_dict
[
'use_gt_boxes_for_masks'
]:
if
self
.
_config_dict
[
'use_gt_boxes_for_masks'
]:
mask_size
=
(
mask_size
=
(
...
@@ -184,11 +172,8 @@ class DeepMaskRCNNModel(tf.keras.Model):
...
@@ -184,11 +172,8 @@ class DeepMaskRCNNModel(tf.keras.Model):
})
})
else
:
else
:
rois
,
roi_classes
,
roi_masks
=
self
.
mask_sampler
(
rois
,
roi_classes
,
roi_masks
=
self
.
mask_sampler
(
rois
,
current_rois
,
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_boxes
,
matched_gt_indices
,
gt_masks
)
matched_gt_classes
,
matched_gt_indices
,
gt_masks
)
roi_masks
=
tf
.
stop_gradient
(
roi_masks
)
roi_masks
=
tf
.
stop_gradient
(
roi_masks
)
model_outputs
.
update
({
model_outputs
.
update
({
'mask_class_targets'
:
roi_classes
,
'mask_class_targets'
:
roi_classes
,
...
@@ -219,24 +204,3 @@ class DeepMaskRCNNModel(tf.keras.Model):
...
@@ -219,24 +204,3 @@ class DeepMaskRCNNModel(tf.keras.Model):
'detection_masks'
:
tf
.
math
.
sigmoid
(
raw_masks
),
'detection_masks'
:
tf
.
math
.
sigmoid
(
raw_masks
),
})
})
return
model_outputs
return
model_outputs
@
property
def
checkpoint_items
(
self
):
"""Returns a dictionary of items to be additionally checkpointed."""
items
=
dict
(
backbone
=
self
.
backbone
,
rpn_head
=
self
.
rpn_head
,
detection_head
=
self
.
detection_head
)
if
self
.
decoder
is
not
None
:
items
.
update
(
decoder
=
self
.
decoder
)
if
self
.
_include_mask
:
items
.
update
(
mask_head
=
self
.
mask_head
)
return
items
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/projects/example/README.md
0 → 100644
View file @
b92025a9
# TF Vision Example Project
This is a minimal example project to demonstrate how to use TF Model Garden's
building blocks to implement a new vision project from scratch.
Below we use classification as an example. We will walk you through the process
of creating a new projects leveraging existing components, such as tasks, data
loaders, models, etc. You will get better understanding of these components by
going through the process. You can also refer to the docstring of corresponding
components to get more information.
## Create Model
In
[
example_model.py
](
example_model.py
)
,
we show how to create a new model. The
`ExampleModel`
is a subclass of
`tf.keras.Model`
that defines necessary parameters. Here, you need to have
`input_specs`
to specify the input shape and dimensions, and build layers within
constructor:
```
python
class
ExampleModel
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
num_classes
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
**
kwargs
):
# Build layers.
```
Given the
`ExampleModel`
, you can define a function that takes a model config as
input and return an
`ExampleModel`
instance, similar as
[
build_example_model
](
example_model.py#L80
)
.
As a simple example, we define a single model. However, you can split the model
implementation to individual components, such as backbones, decoders, heads, as
what we do
[
here
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/modeling
)
.
And then in
`build_example_model`
function, you can hook up these components
together to obtain your full model.
## Create Dataloader
A dataloader reads, decodes and parses the input data. We have created various
[
dataloaders
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/dataloaders
)
to handle standard input formats for classification, detection and segmentation.
If you have non-standard or complex data, you may want to create your own
dataloader. It contains a
`Decoder`
and a
`Parser`
.
-
The
[
Decoder
](
example_input.py#L33
)
decodes a TF Example record and returns a dictionary of decoded tensors:
```python
class Decoder(decoder.Decoder):
"""A tf.Example decoder for classification task."""
def __init__(self):
"""Initializes the decoder.
The constructor defines the mapping between the field name and the value
from an input tf.Example. For example, we define two fields for image bytes
and labels. There is no limit on the number of fields to decode.
"""
self._keys_to_features = {
'image/encoded':
tf.io.FixedLenFeature((), tf.string, default_value=''),
'image/class/label':
tf.io.FixedLenFeature((), tf.int64, default_value=-1)
}
```
-
The
[
Parser
](
example_input.py#L68
)
parses the decoded tensors and performs pre-processing to the input data,
such as image decoding, augmentation and resizing, etc. It should have
`_parse_train_data`
and
`_parse_eval_data`
functions, in which the processed
images and labels are returned.
## Create Config
Next you will define configs for your project. All configs are defined as
`dataclass`
objects, and can have default parameter values.
First, you will define your
[
`ExampleDataConfig`
](
example_config.py#L27
)
.
It inherits from
`config_definitions.DataConfig`
that already defines a few
common fields, like
`input_path`
,
`file_type`
,
`global_batch_size`
, etc. You can
add more fields in your own config as needed.
You can then define you model config
[
`ExampleModel`
](
example_config.py#L39
)
that inherits from
`hyperparams.Config`
. Expose your own model parameters here.
You can then define your
`Loss`
and
`Evaluation`
configs.
Next, you will put all the above configs into an
[
`ExampleTask`
](
example_config.py#L56
)
config. Here you list the configs for your data, model, loss, and evaluation,
etc.
Finally, you can define a
[
`tf_vision_example_experiment`
](
example_config.py#L66
)
,
which creates a template for your experiments and fills with default parameters.
These default parameter values can be overridden by a YAML file, like
[
example_config_tpu.yaml
](
example_config_tpu.yaml
)
.
Also, make sure you give a unique name to your experiment template by the
decorator:
```
python
@
exp_factory
.
register_config_factory
(
'tf_vision_example_experiment'
)
def
tf_vision_example_experiment
()
->
cfg
.
ExperimentConfig
:
"""Definition of a full example experiment."""
# Create and return experiment template.
```
## Create Task
A task is a class that encapsules the logic of loading data, building models,
performing one-step training and validation, etc. It connects all components
together and is called by the base
[
Trainer
](
https://github.com/tensorflow/models/blob/master/official/core/base_trainer.py
)
.
You can create your own task by inheriting from base
[
Task
](
https://github.com/tensorflow/models/blob/master/official/core/base_task.py
)
,
or from one of the
[
tasks
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/tasks/
)
we already defined, if most of the operations can be reused. An
`ExampleTask`
inheriting from
[
ImageClassificationTask
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/tasks/image_classification.py#L32
)
can be found
[
here
](
example_task.py
)
.
We will go through each important components in the task in the following.
-
`build_model`
: you can instantiate a model you have defined above. It is
also good practice to run forward pass with a dummy input to ensure layers
within the model are properly initialized.
-
`build_inputs`
: here you can instantiate a Decoder object and a Parser
object. They are used to create an
`InputReader`
that will generate a
`tf.data.Dataset`
object.
-
`build_losses`
: it takes groundtruth labels and model outputs as input, and
computes the loss. It will be called in
`train_step`
and
`validation_step`
.
You can also define different losses for training and validation, for
example,
`build_train_losses`
and
`build_validation_losses`
. Just make sure
they are called by the corresponding functions properly.
-
`build_metrics`
: here you can define your own metrics. It should return a
list of
`tf.keras.metrics.Metric`
objects. You can create your own metric
class by subclassing
`tf.keras.metrics.Metric`
.
-
`train_step`
and
`validation_step`
: they perform one-step training and
validation. They take one batch of training/validation data, run forward
pass, gather losses and update metrics. They assume the data format is
consistency with that from the
`Parser`
output.
`train_step`
also contains
backward pass to update model weights.
## Import registry
To use your custom dataloaders, models, tasks, etc., you will need to register
them properly. The recommended way is to have a single file with all relevant
files imported, for example,
[
registry_imports.py
](
registry_imports.py
)
.
You can see in this file we import all our custom components:
```
python
# pylint: disable=unused-import
from
official.common
import
registry_imports
from
official.vision.beta.projects.example
import
example_config
from
official.vision.beta.projects.example
import
example_input
from
official.vision.beta.projects.example
import
example_model
from
official.vision.beta.projects.example
import
example_task
```
## Training
You can create your own trainer by branching from our core
[
trainer
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/train.py
)
.
Just make sure you import the registry like this:
```
python
from
official.vision.beta.projects.example
import
registry_imports
# pylint: disable=unused-import
```
You can run training locally for testing purpose:
```
bash
# Assume you are under official/vision/beta/projects.
python3 example/train.py
\
--experiment
=
tf_vision_example_experiment
\
--config_file
=
${
PWD
}
/example/example_config_local.yaml
\
--mode
=
train
\
--model_dir
=
/tmp/tfvision_test/
```
It can also run on Google Cloud using Cloud TPU.
[
Here
](
https://cloud.google.com/tpu/docs/how-to
)
is the instruction of using
Cloud TPU and here is a more detailed
[
tutorial
](
https://cloud.google.com/tpu/docs/tutorials/resnet-rs-2.x
)
of
training a ResNet-RS model. Following the instructions to set up Cloud TPU and
launch training by:
```
bash
EXP_TYPE
=
tf_vision_example_experiment
# This should match the registered name of your experiment template.
EXP_NAME
=
exp_001
# You can give any name to the experiment.
TPU_NAME
=
experiment01
# Now launch the experiment.
python3 example/train.py
\
--experiment
=
$EXP_TYPE
\
--mode
=
train
\
--tpu
=
$TPU_NAME
\
--model_dir
=
/tmp/tfvision_test/
--config_file
=
third_party/tensorflow_models/official/vision/beta/projects/example/example_config_tpu.yaml
```
official/vision/beta/projects/movinet/README.md
View file @
b92025a9
...
@@ -338,7 +338,7 @@ with the Python API:
...
@@ -338,7 +338,7 @@ with the Python API:
```
python
```
python
# Create the interpreter and signature runner
# Create the interpreter and signature runner
interpreter
=
tf
.
lite
.
Interpreter
(
'/tmp/movinet_a0_stream.tflite'
)
interpreter
=
tf
.
lite
.
Interpreter
(
'/tmp/movinet_a0_stream.tflite'
)
signature
=
interpreter
.
get_signature_runner
()
runner
=
interpreter
.
get_signature_runner
()
# Extract state names and create the initial (zero) states
# Extract state names and create the initial (zero) states
def
state_name
(
name
:
str
)
->
str
:
def
state_name
(
name
:
str
)
->
str
:
...
@@ -358,7 +358,7 @@ clips = tf.split(video, video.shape[1], axis=1)
...
@@ -358,7 +358,7 @@ clips = tf.split(video, video.shape[1], axis=1)
states
=
init_states
states
=
init_states
for
clip
in
clips
:
for
clip
in
clips
:
# Input shape: [1, 1, 172, 172, 3]
# Input shape: [1, 1, 172, 172, 3]
outputs
=
signature
(
**
states
,
image
=
clip
)
outputs
=
runner
(
**
states
,
image
=
clip
)
logits
=
outputs
.
pop
(
'logits'
)
logits
=
outputs
.
pop
(
'logits'
)
states
=
outputs
states
=
outputs
```
```
...
...
official/vision/beta/projects/movinet/export_saved_model_test.py
View file @
b92025a9
...
@@ -121,7 +121,7 @@ class ExportSavedModelTest(tf.test.TestCase):
...
@@ -121,7 +121,7 @@ class ExportSavedModelTest(tf.test.TestCase):
tflite_model
=
converter
.
convert
()
tflite_model
=
converter
.
convert
()
interpreter
=
tf
.
lite
.
Interpreter
(
model_content
=
tflite_model
)
interpreter
=
tf
.
lite
.
Interpreter
(
model_content
=
tflite_model
)
signature
=
interpreter
.
get_signature_runner
()
runner
=
interpreter
.
get_signature_runner
(
'serving_default'
)
def
state_name
(
name
:
str
)
->
str
:
def
state_name
(
name
:
str
)
->
str
:
return
name
[
len
(
'serving_default_'
):
-
len
(
':0'
)]
return
name
[
len
(
'serving_default_'
):
-
len
(
':0'
)]
...
@@ -137,7 +137,7 @@ class ExportSavedModelTest(tf.test.TestCase):
...
@@ -137,7 +137,7 @@ class ExportSavedModelTest(tf.test.TestCase):
states
=
init_states
states
=
init_states
for
clip
in
clips
:
for
clip
in
clips
:
outputs
=
signature
(
**
states
,
image
=
clip
)
outputs
=
runner
(
**
states
,
image
=
clip
)
logits
=
outputs
.
pop
(
'logits'
)
logits
=
outputs
.
pop
(
'logits'
)
states
=
outputs
states
=
outputs
...
...
official/vision/beta/projects/movinet/modeling/movinet.py
View file @
b92025a9
...
@@ -17,10 +17,10 @@
...
@@ -17,10 +17,10 @@
Reference: https://arxiv.org/pdf/2103.11511.pdf
Reference: https://arxiv.org/pdf/2103.11511.pdf
"""
"""
import
dataclasses
import
math
import
math
from
typing
import
Dict
,
Mapping
,
Optional
,
Sequence
,
Tuple
,
Union
from
typing
import
Dict
,
Mapping
,
Optional
,
Sequence
,
Tuple
,
Union
import
dataclasses
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
hyperparams
...
@@ -454,7 +454,7 @@ class Movinet(tf.keras.Model):
...
@@ -454,7 +454,7 @@ class Movinet(tf.keras.Model):
stochastic_depth_idx
=
1
stochastic_depth_idx
=
1
for
block_idx
,
block
in
enumerate
(
self
.
_block_specs
):
for
block_idx
,
block
in
enumerate
(
self
.
_block_specs
):
if
isinstance
(
block
,
StemSpec
):
if
isinstance
(
block
,
StemSpec
):
x
,
states
=
movinet_layers
.
Stem
(
layer_obj
=
movinet_layers
.
Stem
(
block
.
filters
,
block
.
filters
,
block
.
kernel_size
,
block
.
kernel_size
,
block
.
strides
,
block
.
strides
,
...
@@ -466,9 +466,9 @@ class Movinet(tf.keras.Model):
...
@@ -466,9 +466,9 @@ class Movinet(tf.keras.Model):
batch_norm_layer
=
self
.
_norm
,
batch_norm_layer
=
self
.
_norm
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
state_prefix
=
'state
/
stem'
,
state_prefix
=
'state
_
stem'
,
name
=
'stem'
)
(
name
=
'stem'
)
x
,
states
=
states
)
x
,
states
=
layer_obj
(
x
,
states
=
states
)
endpoints
[
'stem'
]
=
x
endpoints
[
'stem'
]
=
x
elif
isinstance
(
block
,
MovinetBlockSpec
):
elif
isinstance
(
block
,
MovinetBlockSpec
):
if
not
(
len
(
block
.
expand_filters
)
==
len
(
block
.
kernel_sizes
)
==
if
not
(
len
(
block
.
expand_filters
)
==
len
(
block
.
kernel_sizes
)
==
...
@@ -486,8 +486,8 @@ class Movinet(tf.keras.Model):
...
@@ -486,8 +486,8 @@ class Movinet(tf.keras.Model):
self
.
_stochastic_depth_drop_rate
*
stochastic_depth_idx
/
self
.
_stochastic_depth_drop_rate
*
stochastic_depth_idx
/
num_layers
)
num_layers
)
expand_filters
,
kernel_size
,
strides
=
layer
expand_filters
,
kernel_size
,
strides
=
layer
name
=
f
'b
{
block_idx
-
1
}
/l
{
layer_idx
}
'
name
=
f
'b
lock
{
block_idx
-
1
}
_layer
{
layer_idx
}
'
x
,
states
=
movinet_layers
.
MovinetBlock
(
layer_obj
=
movinet_layers
.
MovinetBlock
(
block
.
base_filters
,
block
.
base_filters
,
expand_filters
,
expand_filters
,
kernel_size
=
kernel_size
,
kernel_size
=
kernel_size
,
...
@@ -505,13 +505,14 @@ class Movinet(tf.keras.Model):
...
@@ -505,13 +505,14 @@ class Movinet(tf.keras.Model):
batch_norm_layer
=
self
.
_norm
,
batch_norm_layer
=
self
.
_norm
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
state_prefix
=
f
'state/
{
name
}
'
,
state_prefix
=
f
'state_
{
name
}
'
,
name
=
name
)(
name
=
name
)
x
,
states
=
states
)
x
,
states
=
layer_obj
(
x
,
states
=
states
)
endpoints
[
name
]
=
x
endpoints
[
name
]
=
x
stochastic_depth_idx
+=
1
stochastic_depth_idx
+=
1
elif
isinstance
(
block
,
HeadSpec
):
elif
isinstance
(
block
,
HeadSpec
):
x
,
states
=
movinet_layers
.
Head
(
layer_obj
=
movinet_layers
.
Head
(
project_filters
=
block
.
project_filters
,
project_filters
=
block
.
project_filters
,
conv_type
=
self
.
_conv_type
,
conv_type
=
self
.
_conv_type
,
activation
=
self
.
_activation
,
activation
=
self
.
_activation
,
...
@@ -520,9 +521,9 @@ class Movinet(tf.keras.Model):
...
@@ -520,9 +521,9 @@ class Movinet(tf.keras.Model):
batch_norm_layer
=
self
.
_norm
,
batch_norm_layer
=
self
.
_norm
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_momentum
=
self
.
_norm_momentum
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
batch_norm_epsilon
=
self
.
_norm_epsilon
,
state_prefix
=
'state
/
head'
,
state_prefix
=
'state
_
head'
,
name
=
'head'
)
(
name
=
'head'
)
x
,
states
=
states
)
x
,
states
=
layer_obj
(
x
,
states
=
states
)
endpoints
[
'head'
]
=
x
endpoints
[
'head'
]
=
x
else
:
else
:
raise
ValueError
(
'Unknown block type {}'
.
format
(
block
))
raise
ValueError
(
'Unknown block type {}'
.
format
(
block
))
...
@@ -567,7 +568,7 @@ class Movinet(tf.keras.Model):
...
@@ -567,7 +568,7 @@ class Movinet(tf.keras.Model):
for
block_idx
,
block
in
enumerate
(
block_specs
):
for
block_idx
,
block
in
enumerate
(
block_specs
):
if
isinstance
(
block
,
StemSpec
):
if
isinstance
(
block
,
StemSpec
):
if
block
.
kernel_size
[
0
]
>
1
:
if
block
.
kernel_size
[
0
]
>
1
:
states
[
'state
/
stem
/
stream_buffer'
]
=
(
states
[
'state
_
stem
_
stream_buffer'
]
=
(
input_shape
[
0
],
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
1
],
divide_resolution
(
input_shape
[
2
],
num_downsamples
),
divide_resolution
(
input_shape
[
2
],
num_downsamples
),
...
@@ -590,8 +591,10 @@ class Movinet(tf.keras.Model):
...
@@ -590,8 +591,10 @@ class Movinet(tf.keras.Model):
self
.
_conv_type
in
[
'2plus1d'
,
'3d_2plus1d'
]):
self
.
_conv_type
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
num_downsamples
+=
1
prefix
=
f
'state_block
{
block_idx
}
_layer
{
layer_idx
}
'
if
kernel_size
[
0
]
>
1
:
if
kernel_size
[
0
]
>
1
:
states
[
f
'
state/b
{
block_idx
}
/l
{
layer_idx
}
/
stream_buffer'
]
=
(
states
[
f
'
{
prefix
}
_
stream_buffer'
]
=
(
input_shape
[
0
],
input_shape
[
0
],
kernel_size
[
0
]
-
1
,
kernel_size
[
0
]
-
1
,
divide_resolution
(
input_shape
[
2
],
num_downsamples
),
divide_resolution
(
input_shape
[
2
],
num_downsamples
),
...
@@ -599,13 +602,13 @@ class Movinet(tf.keras.Model):
...
@@ -599,13 +602,13 @@ class Movinet(tf.keras.Model):
expand_filters
,
expand_filters
,
)
)
states
[
f
'
state/b
{
block_idx
}
/l
{
layer_idx
}
/
pool_buffer'
]
=
(
states
[
f
'
{
prefix
}
_
pool_buffer'
]
=
(
input_shape
[
0
],
1
,
1
,
1
,
expand_filters
,
input_shape
[
0
],
1
,
1
,
1
,
expand_filters
,
)
)
states
[
f
'
state/b
{
block_idx
}
/l
{
layer_idx
}
/
pool_frame_count'
]
=
(
1
,)
states
[
f
'
{
prefix
}
_
pool_frame_count'
]
=
(
1
,)
if
use_positional_encoding
:
if
use_positional_encoding
:
name
=
f
'
state/b
{
block_idx
}
/l
{
layer_idx
}
/
pos_enc_frame_count'
name
=
f
'
{
prefix
}
_
pos_enc_frame_count'
states
[
name
]
=
(
1
,)
states
[
name
]
=
(
1
,)
if
strides
[
1
]
!=
strides
[
2
]:
if
strides
[
1
]
!=
strides
[
2
]:
...
@@ -618,10 +621,10 @@ class Movinet(tf.keras.Model):
...
@@ -618,10 +621,10 @@ class Movinet(tf.keras.Model):
self
.
_conv_type
not
in
[
'2plus1d'
,
'3d_2plus1d'
]):
self
.
_conv_type
not
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
num_downsamples
+=
1
elif
isinstance
(
block
,
HeadSpec
):
elif
isinstance
(
block
,
HeadSpec
):
states
[
'state
/
head
/
pool_buffer'
]
=
(
states
[
'state
_
head
_
pool_buffer'
]
=
(
input_shape
[
0
],
1
,
1
,
1
,
block
.
project_filters
,
input_shape
[
0
],
1
,
1
,
1
,
block
.
project_filters
,
)
)
states
[
'state
/
head
/
pool_frame_count'
]
=
(
1
,)
states
[
'state
_
head
_
pool_frame_count'
]
=
(
1
,)
return
states
return
states
...
...
official/vision/beta/projects/movinet/modeling/movinet_layers.py
View file @
b92025a9
...
@@ -478,7 +478,7 @@ class StreamBuffer(tf.keras.layers.Layer):
...
@@ -478,7 +478,7 @@ class StreamBuffer(tf.keras.layers.Layer):
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
self
.
_state_prefix
=
state_prefix
self
.
_state_prefix
=
state_prefix
self
.
_state_name
=
f
'
{
state_prefix
}
/
stream_buffer'
self
.
_state_name
=
f
'
{
state_prefix
}
_
stream_buffer'
self
.
_buffer_size
=
buffer_size
self
.
_buffer_size
=
buffer_size
def
get_config
(
self
):
def
get_config
(
self
):
...
@@ -501,7 +501,7 @@ class StreamBuffer(tf.keras.layers.Layer):
...
@@ -501,7 +501,7 @@ class StreamBuffer(tf.keras.layers.Layer):
inputs: the input tensor.
inputs: the input tensor.
states: a dict of states such that, if any of the keys match for this
states: a dict of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s).
layer, will overwrite the contents of the buffer(s).
Expected keys include `state_prefix + '
/
stream_buffer'`.
Expected keys include `state_prefix + '
_
stream_buffer'`.
Returns:
Returns:
the output tensor and states
the output tensor and states
...
...
official/vision/beta/projects/movinet/modeling/movinet_test.py
View file @
b92025a9
...
@@ -35,11 +35,11 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -35,11 +35,11 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase):
endpoints
,
states
=
network
(
inputs
)
endpoints
,
states
=
network
(
inputs
)
self
.
assertAllEqual
(
endpoints
[
'stem'
].
shape
,
[
1
,
8
,
64
,
64
,
8
])
self
.
assertAllEqual
(
endpoints
[
'stem'
].
shape
,
[
1
,
8
,
64
,
64
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
0/l
0'
].
shape
,
[
1
,
8
,
32
,
32
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
lock0_layer
0'
].
shape
,
[
1
,
8
,
32
,
32
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
1/l
0'
].
shape
,
[
1
,
8
,
16
,
16
,
32
])
self
.
assertAllEqual
(
endpoints
[
'b
lock1_layer
0'
].
shape
,
[
1
,
8
,
16
,
16
,
32
])
self
.
assertAllEqual
(
endpoints
[
'b
2/l
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
lock2_layer
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
3/l
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
lock3_layer
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
4/l
0'
].
shape
,
[
1
,
8
,
4
,
4
,
104
])
self
.
assertAllEqual
(
endpoints
[
'b
lock4_layer
0'
].
shape
,
[
1
,
8
,
4
,
4
,
104
])
self
.
assertAllEqual
(
endpoints
[
'head'
].
shape
,
[
1
,
1
,
1
,
1
,
480
])
self
.
assertAllEqual
(
endpoints
[
'head'
].
shape
,
[
1
,
1
,
1
,
1
,
480
])
self
.
assertNotEmpty
(
states
)
self
.
assertNotEmpty
(
states
)
...
@@ -59,11 +59,11 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -59,11 +59,11 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase):
endpoints
,
new_states
=
backbone
({
**
init_states
,
'image'
:
inputs
})
endpoints
,
new_states
=
backbone
({
**
init_states
,
'image'
:
inputs
})
self
.
assertAllEqual
(
endpoints
[
'stem'
].
shape
,
[
1
,
8
,
64
,
64
,
8
])
self
.
assertAllEqual
(
endpoints
[
'stem'
].
shape
,
[
1
,
8
,
64
,
64
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
0/l
0'
].
shape
,
[
1
,
8
,
32
,
32
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
lock0_layer
0'
].
shape
,
[
1
,
8
,
32
,
32
,
8
])
self
.
assertAllEqual
(
endpoints
[
'b
1/l
0'
].
shape
,
[
1
,
8
,
16
,
16
,
32
])
self
.
assertAllEqual
(
endpoints
[
'b
lock1_layer
0'
].
shape
,
[
1
,
8
,
16
,
16
,
32
])
self
.
assertAllEqual
(
endpoints
[
'b
2/l
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
lock2_layer
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
3/l
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
lock3_layer
0'
].
shape
,
[
1
,
8
,
8
,
8
,
56
])
self
.
assertAllEqual
(
endpoints
[
'b
4/l
0'
].
shape
,
[
1
,
8
,
4
,
4
,
104
])
self
.
assertAllEqual
(
endpoints
[
'b
lock4_layer
0'
].
shape
,
[
1
,
8
,
4
,
4
,
104
])
self
.
assertAllEqual
(
endpoints
[
'head'
].
shape
,
[
1
,
1
,
1
,
1
,
480
])
self
.
assertAllEqual
(
endpoints
[
'head'
].
shape
,
[
1
,
1
,
1
,
1
,
480
])
self
.
assertNotEmpty
(
init_states
)
self
.
assertNotEmpty
(
init_states
)
...
...
official/vision/beta/projects/simclr/common/registry_imports.py
View file @
b92025a9
...
@@ -12,20 +12,6 @@
...
@@ -12,20 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""All necessary imports for registration."""
"""All necessary imports for registration."""
# pylint: disable=unused-import
# pylint: disable=unused-import
...
...
official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_multitask_tpu.yaml
0 → 100644
View file @
b92025a9
runtime
:
distribution_strategy
:
tpu
mixed_precision_dtype
:
'
bfloat16'
task
:
init_checkpoint
:
'
'
model
:
backbone
:
resnet
:
model_id
:
50
type
:
resnet
projection_head
:
ft_proj_idx
:
1
num_proj_layers
:
3
proj_output_dim
:
128
backbone_trainable
:
true
heads
:
!!python/tuple
# Define heads for the PRETRAIN networks here
-
task_name
:
pretrain_imagenet
mode
:
pretrain
# # Define heads for the FINETUNE networks here
-
task_name
:
finetune_imagenet_10percent
mode
:
finetune
supervised_head
:
num_classes
:
1001
zero_init
:
true
input_size
:
[
224
,
224
,
3
]
l2_weight_decay
:
0.0
norm_activation
:
norm_epsilon
:
1.0e-05
norm_momentum
:
0.9
use_sync_bn
:
true
task_routines
:
!!python/tuple
# Define TASK CONFIG for the PRETRAIN networks here
-
task_name
:
pretrain_imagenet
task_weight
:
30.0
task_config
:
evaluation
:
one_hot
:
true
top_k
:
5
loss
:
l2_weight_decay
:
0.0
projection_norm
:
true
temperature
:
0.1
model
:
input_size
:
[
224
,
224
,
3
]
mode
:
pretrain
train_data
:
input_path
:
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*
input_set_label_to_zero
:
true
# Set labels to zeros to double confirm that no label is used during pretrain
is_training
:
true
global_batch_size
:
4096
dtype
:
'
bfloat16'
parser
:
aug_rand_hflip
:
true
mode
:
pretrain
decoder
:
decode_label
:
true
validation_data
:
input_path
:
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*
is_training
:
false
global_batch_size
:
2048
dtype
:
'
bfloat16'
drop_remainder
:
false
parser
:
mode
:
pretrain
decoder
:
decode_label
:
true
# Define TASK CONFIG for the FINETUNE Networks here
-
task_name
:
finetune_imagenet_10percent
task_weight
:
1.0
task_config
:
evaluation
:
one_hot
:
true
top_k
:
5
loss
:
l2_weight_decay
:
0.0
label_smoothing
:
0.0
one_hot
:
true
model
:
input_size
:
[
224
,
224
,
3
]
mode
:
finetune
supervised_head
:
num_classes
:
1001
zero_init
:
true
train_data
:
tfds_name
:
'
imagenet2012_subset/10pct'
tfds_split
:
'
train'
input_path
:
'
'
is_training
:
true
global_batch_size
:
1024
dtype
:
'
bfloat16'
parser
:
aug_rand_hflip
:
true
mode
:
finetune
decoder
:
decode_label
:
true
validation_data
:
tfds_name
:
'
imagenet2012_subset/10pct'
tfds_split
:
'
validation'
input_path
:
'
'
is_training
:
false
global_batch_size
:
2048
dtype
:
'
bfloat16'
drop_remainder
:
false
parser
:
mode
:
finetune
decoder
:
decode_label
:
true
trainer
:
trainer_type
:
interleaving
task_sampler
:
proportional
:
alpha
:
1.0
type
:
proportional
train_steps
:
32000
# 100 epochs
validation_steps
:
24
# NUM_EXAMPLES (50000) // global_batch_size
validation_interval
:
625
steps_per_loop
:
625
# NUM_EXAMPLES (1281167) // global_batch_size
summary_interval
:
625
checkpoint_interval
:
625
max_to_keep
:
3
optimizer_config
:
learning_rate
:
cosine
:
decay_steps
:
32000
initial_learning_rate
:
4.8
type
:
cosine
optimizer
:
lars
:
exclude_from_weight_decay
:
[
batch_normalization
,
bias
]
momentum
:
0.9
weight_decay_rate
:
1.0e-06
type
:
lars
warmup
:
linear
:
name
:
linear
warmup_steps
:
3200
type
:
linear
official/vision/beta/projects/simclr/configs/multitask_config.py
View file @
b92025a9
...
@@ -29,6 +29,7 @@ from official.vision.beta.projects.simclr.modeling import simclr_model
...
@@ -29,6 +29,7 @@ from official.vision.beta.projects.simclr.modeling import simclr_model
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
class
SimCLRMTHeadConfig
(
hyperparams
.
Config
):
class
SimCLRMTHeadConfig
(
hyperparams
.
Config
):
"""Per-task specific configs."""
"""Per-task specific configs."""
task_name
:
str
=
'task_name'
# Supervised head is required for finetune, but optional for pretrain.
# Supervised head is required for finetune, but optional for pretrain.
supervised_head
:
simclr_configs
.
SupervisedHead
=
simclr_configs
.
SupervisedHead
(
supervised_head
:
simclr_configs
.
SupervisedHead
=
simclr_configs
.
SupervisedHead
(
num_classes
=
1001
)
num_classes
=
1001
)
...
@@ -57,14 +58,17 @@ def multitask_simclr() -> multitask_configs.MultiTaskExperimentConfig:
...
@@ -57,14 +58,17 @@ def multitask_simclr() -> multitask_configs.MultiTaskExperimentConfig:
return
multitask_configs
.
MultiTaskExperimentConfig
(
return
multitask_configs
.
MultiTaskExperimentConfig
(
task
=
multitask_configs
.
MultiTaskConfig
(
task
=
multitask_configs
.
MultiTaskConfig
(
model
=
SimCLRMTModelConfig
(
model
=
SimCLRMTModelConfig
(
heads
=
(
SimCLRMTHeadConfig
(
mode
=
simclr_model
.
PRETRAIN
),
heads
=
(
SimCLRMTHeadConfig
(
SimCLRMTHeadConfig
(
mode
=
simclr_model
.
FINETUNE
))),
task_name
=
'pretrain_simclr'
,
mode
=
simclr_model
.
PRETRAIN
),
SimCLRMTHeadConfig
(
task_name
=
'finetune_simclr'
,
mode
=
simclr_model
.
FINETUNE
))),
task_routines
=
(
multitask_configs
.
TaskRoutine
(
task_routines
=
(
multitask_configs
.
TaskRoutine
(
task_name
=
simclr_model
.
PRETRAIN
,
task_name
=
'pretrain_simclr'
,
task_config
=
simclr_configs
.
SimCLRPretrainTask
(),
task_config
=
simclr_configs
.
SimCLRPretrainTask
(),
task_weight
=
2.0
),
task_weight
=
2.0
),
multitask_configs
.
TaskRoutine
(
multitask_configs
.
TaskRoutine
(
task_name
=
simclr_model
.
FINETUNE
,
task_name
=
'finetune_simclr'
,
task_config
=
simclr_configs
.
SimCLRFinetuneTask
(),
task_config
=
simclr_configs
.
SimCLRFinetuneTask
(),
task_weight
=
1.0
))),
task_weight
=
1.0
))),
trainer
=
multitask_configs
.
MultiTaskTrainerConfig
())
trainer
=
multitask_configs
.
MultiTaskTrainerConfig
())
official/vision/beta/projects/simclr/configs/simclr.py
View file @
b92025a9
...
@@ -12,27 +12,11 @@
...
@@ -12,27 +12,11 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SimCLR configurations."""
"""SimCLR configurations."""
import
dataclasses
import
os
import
os
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
import
dataclasses
from
official.core
import
config_definitions
as
cfg
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
hyperparams
...
@@ -73,6 +57,9 @@ class DataConfig(cfg.DataConfig):
...
@@ -73,6 +57,9 @@ class DataConfig(cfg.DataConfig):
# simclr specific configs
# simclr specific configs
parser
:
Parser
=
Parser
()
parser
:
Parser
=
Parser
()
decoder
:
Decoder
=
Decoder
()
decoder
:
Decoder
=
Decoder
()
# Useful when doing a sanity check that we absolutely use no labels while
# pretrain by setting labels to zeros (default = False, keep original labels)
input_set_label_to_zero
:
bool
=
False
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
...
@@ -115,9 +102,7 @@ class SimCLRModel(hyperparams.Config):
...
@@ -115,9 +102,7 @@ class SimCLRModel(hyperparams.Config):
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
projection_head
:
ProjectionHead
=
ProjectionHead
(
projection_head
:
ProjectionHead
=
ProjectionHead
(
proj_output_dim
=
128
,
proj_output_dim
=
128
,
num_proj_layers
=
3
,
ft_proj_idx
=
1
)
num_proj_layers
=
3
,
ft_proj_idx
=
1
)
supervised_head
:
SupervisedHead
=
SupervisedHead
(
num_classes
=
1001
)
supervised_head
:
SupervisedHead
=
SupervisedHead
(
num_classes
=
1001
)
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)
...
@@ -201,9 +186,7 @@ def simclr_pretraining_imagenet() -> cfg.ExperimentConfig:
...
@@ -201,9 +186,7 @@ def simclr_pretraining_imagenet() -> cfg.ExperimentConfig:
backbone
=
backbones
.
Backbone
(
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
projection_head
=
ProjectionHead
(
projection_head
=
ProjectionHead
(
proj_output_dim
=
128
,
proj_output_dim
=
128
,
num_proj_layers
=
3
,
ft_proj_idx
=
1
),
num_proj_layers
=
3
,
ft_proj_idx
=
1
),
supervised_head
=
SupervisedHead
(
num_classes
=
1001
),
supervised_head
=
SupervisedHead
(
num_classes
=
1001
),
norm_activation
=
common
.
NormActivation
(
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
True
)),
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
True
)),
...
@@ -233,10 +216,13 @@ def simclr_pretraining_imagenet() -> cfg.ExperimentConfig:
...
@@ -233,10 +216,13 @@ def simclr_pretraining_imagenet() -> cfg.ExperimentConfig:
'optimizer'
:
{
'optimizer'
:
{
'type'
:
'lars'
,
'type'
:
'lars'
,
'lars'
:
{
'lars'
:
{
'momentum'
:
0.9
,
'momentum'
:
'weight_decay_rate'
:
0.000001
,
0.9
,
'weight_decay_rate'
:
0.000001
,
'exclude_from_weight_decay'
:
[
'exclude_from_weight_decay'
:
[
'batch_normalization'
,
'bias'
]
'batch_normalization'
,
'bias'
]
}
}
},
},
'learning_rate'
:
{
'learning_rate'
:
{
...
@@ -278,11 +264,8 @@ def simclr_finetuning_imagenet() -> cfg.ExperimentConfig:
...
@@ -278,11 +264,8 @@ def simclr_finetuning_imagenet() -> cfg.ExperimentConfig:
backbone
=
backbones
.
Backbone
(
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
projection_head
=
ProjectionHead
(
projection_head
=
ProjectionHead
(
proj_output_dim
=
128
,
proj_output_dim
=
128
,
num_proj_layers
=
3
,
ft_proj_idx
=
1
),
num_proj_layers
=
3
,
supervised_head
=
SupervisedHead
(
num_classes
=
1001
,
zero_init
=
True
),
ft_proj_idx
=
1
),
supervised_head
=
SupervisedHead
(
num_classes
=
1001
,
zero_init
=
True
),
norm_activation
=
common
.
NormActivation
(
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)),
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)),
loss
=
ClassificationLosses
(),
loss
=
ClassificationLosses
(),
...
@@ -311,10 +294,13 @@ def simclr_finetuning_imagenet() -> cfg.ExperimentConfig:
...
@@ -311,10 +294,13 @@ def simclr_finetuning_imagenet() -> cfg.ExperimentConfig:
'optimizer'
:
{
'optimizer'
:
{
'type'
:
'lars'
,
'type'
:
'lars'
,
'lars'
:
{
'lars'
:
{
'momentum'
:
0.9
,
'momentum'
:
'weight_decay_rate'
:
0.0
,
0.9
,
'weight_decay_rate'
:
0.0
,
'exclude_from_weight_decay'
:
[
'exclude_from_weight_decay'
:
[
'batch_normalization'
,
'bias'
]
'batch_normalization'
,
'bias'
]
}
}
},
},
'learning_rate'
:
{
'learning_rate'
:
{
...
...
official/vision/beta/projects/simclr/configs/simclr_test.py
View file @
b92025a9
...
@@ -12,23 +12,7 @@
...
@@ -12,23 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
"""Tests for SimCLR config."""
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for simclr."""
# pylint: disable=unused-import
from
absl.testing
import
parameterized
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow
as
tf
...
...
official/vision/beta/projects/simclr/dataloaders/preprocess_ops.py
View file @
b92025a9
...
@@ -12,20 +12,6 @@
...
@@ -12,20 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocessing ops."""
"""Preprocessing ops."""
import
functools
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
...
...
official/vision/beta/projects/simclr/dataloaders/simclr_input.py
View file @
b92025a9
...
@@ -12,20 +12,6 @@
...
@@ -12,20 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data parser and processing for SimCLR.
"""Data parser and processing for SimCLR.
For pre-training:
For pre-training:
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment