Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
b261ebb4
Commit
b261ebb4
authored
Oct 11, 2021
by
A. Unique TensorFlower
Browse files
Merge pull request #10286 from PurdueDualityLab:task_pr
PiperOrigin-RevId: 402338060
parents
ca431476
379d64c5
Changes
30
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1177 additions
and
156 deletions
+1177
-156
official/vision/beta/projects/yolo/README.md
official/vision/beta/projects/yolo/README.md
+6
-2
official/vision/beta/projects/yolo/common/registry_imports.py
...cial/vision/beta/projects/yolo/common/registry_imports.py
+15
-0
official/vision/beta/projects/yolo/configs/decoders.py
official/vision/beta/projects/yolo/configs/decoders.py
+46
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/csp_darknet53.yaml
...jects/yolo/configs/experiments/darknet/csp_darknet53.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/csp_darknet53_tfds.yaml
.../yolo/configs/experiments/darknet/csp_darknet53_tfds.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/darknet53.yaml
.../projects/yolo/configs/experiments/darknet/darknet53.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/darknet53_tfds.yaml
...ects/yolo/configs/experiments/darknet/darknet53_tfds.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
...projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
+74
-0
official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
...eta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
+138
-0
official/vision/beta/projects/yolo/configs/yolo.py
official/vision/beta/projects/yolo/configs/yolo.py
+510
-0
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+4
-3
official/vision/beta/projects/yolo/losses/yolo_loss.py
official/vision/beta/projects/yolo/losses/yolo_loss.py
+77
-47
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
+1
-1
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+3
-0
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+119
-0
official/vision/beta/projects/yolo/modeling/factory.py
official/vision/beta/projects/yolo/modeling/factory.py
+95
-0
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
...beta/projects/yolo/modeling/layers/detection_generator.py
+5
-4
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
...al/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+39
-1
official/vision/beta/projects/yolo/modeling/yolo_model.py
official/vision/beta/projects/yolo/modeling/yolo_model.py
+36
-94
official/vision/beta/projects/yolo/ops/mosaic.py
official/vision/beta/projects/yolo/ops/mosaic.py
+9
-4
No files found.
official/vision/beta/projects/yolo/README.md
View file @
b261ebb4
...
@@ -73,10 +73,14 @@ connected to a new, more powerful backbone if a person chose to.
...
@@ -73,10 +73,14 @@ connected to a new, more powerful backbone if a person chose to.
| Yolo-v3 spp |
| Yolo-v3 spp |
| Yolo-v4 |
| Yolo-v4 |
| Yolo-v4 tiny |
| Yolo-v4 tiny |
| Yolo-v4 csp |
| Yolo-v4 large |
## Models Zoo
## Requirements
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
## Requirements
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.6.0)
[

](https://www.python.org/downloads/release/python-380/)
[

](https://www.python.org/downloads/release/python-380/)
...
...
official/vision/beta/projects/yolo/common/registry_imports.py
View file @
b261ebb4
...
@@ -15,7 +15,22 @@
...
@@ -15,7 +15,22 @@
"""All necessary imports for registration."""
"""All necessary imports for registration."""
# pylint: disable=unused-import
# pylint: disable=unused-import
# pylint: disable=g-bad-import-order
from
official.common
import
registry_imports
from
official.common
import
registry_imports
# import configs
from
official.vision.beta.projects.yolo.configs
import
darknet_classification
from
official.vision.beta.projects.yolo.configs
import
darknet_classification
from
official.vision.beta.projects.yolo.configs
import
yolo
as
yolo_config
# import modeling components
from
official.vision.beta.projects.yolo.modeling.backbones
import
darknet
from
official.vision.beta.projects.yolo.modeling.backbones
import
darknet
from
official.vision.beta.projects.yolo.modeling.decoders
import
yolo_decoder
# import tasks
from
official.vision.beta.projects.yolo.tasks
import
image_classification
from
official.vision.beta.projects.yolo.tasks
import
image_classification
from
official.vision.beta.projects.yolo.tasks
import
yolo
as
yolo_task
# import optimization packages
from
official.vision.beta.projects.yolo.optimization
import
optimizer_factory
from
official.vision.beta.projects.yolo.optimization.configs
import
optimizer_config
from
official.vision.beta.projects.yolo.optimization.configs
import
optimization_config
official/vision/beta/projects/yolo/configs/decoders.py
0 → 100755
View file @
b261ebb4
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoders configurations."""
import
dataclasses
from
typing
import
Optional
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
decoders
@
dataclasses
.
dataclass
class
YoloDecoder
(
hyperparams
.
Config
):
"""Builds Yolo decoder.
If the name is specified, or version is specified we ignore input parameters
and use version and name defaults.
"""
version
:
Optional
[
str
]
=
None
type
:
Optional
[
str
]
=
None
use_fpn
:
Optional
[
bool
]
=
None
use_spatial_attention
:
bool
=
False
use_separable_conv
:
bool
=
False
csp_stack
:
Optional
[
bool
]
=
None
fpn_depth
:
Optional
[
int
]
=
None
fpn_filter_scale
:
Optional
[
int
]
=
None
path_process_len
:
Optional
[
int
]
=
None
max_level_process_len
:
Optional
[
int
]
=
None
embed_spp
:
Optional
[
bool
]
=
None
activation
:
Optional
[
str
]
=
'same'
@
dataclasses
.
dataclass
class
Decoder
(
decoders
.
Decoder
):
type
:
Optional
[
str
]
=
'yolo_decoder'
yolo_decoder
:
YoloDecoder
=
YoloDecoder
()
official/vision/beta/projects/yolo/configs/experiments/csp_darknet53.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
csp_darknet53.yaml
View file @
b261ebb4
File moved
official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
csp_darknet53_tfds.yaml
View file @
b261ebb4
File moved
official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
darknet53.yaml
View file @
b261ebb4
File moved
official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
darknet53_tfds.yaml
View file @
b261ebb4
File moved
official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
0 → 100644
View file @
b261ebb4
# --experiment_type=scaled_yolo
# mAP 47.6
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
float32'
tpu_enable_xla_dynamic_padder
:
false
task
:
model
:
input_size
:
[
640
,
640
,
3
]
backbone
:
type
:
'
darknet'
darknet
:
model_id
:
'
altered_cspdarknet53'
max_level
:
5
min_level
:
3
decoder
:
type
:
yolo_decoder
yolo_decoder
:
version
:
v4
type
:
csp
head
:
smart_bias
:
true
detection_generator
:
box_type
:
'
all'
:
scaled
scale_xy
:
'
all'
:
2.0
max_boxes
:
300
nms_type
:
iou
iou_thresh
:
0.001
nms_thresh
:
0.60
loss
:
use_scaled_loss
:
true
update_on_repeat
:
true
box_loss_type
:
'
all'
:
ciou
ignore_thresh
:
'
all'
:
0.0
iou_normalizer
:
'
all'
:
0.05
cls_normalizer
:
'
all'
:
0.3
object_normalizer
:
'
5'
:
0.28
'
4'
:
0.70
'
3'
:
2.80
objectness_smooth
:
'
all'
:
1.0
norm_activation
:
use_sync_bn
:
true
num_classes
:
80
anchor_boxes
:
anchors_per_scale
:
3
boxes
:
[
box
:
[
12
,
16
],
box
:
[
19
,
36
],
box
:
[
40
,
28
],
box
:
[
36
,
75
],
box
:
[
76
,
55
],
box
:
[
72
,
146
],
box
:
[
142
,
110
],
box
:
[
192
,
243
],
box
:
[
459
,
401
]]
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size
:
10000
parser
:
mosaic
:
mosaic_frequency
:
1.0
mixup_frequency
:
0.0
mosaic_crop_mode
:
'
scale'
mosaic_center
:
0.25
aug_scale_min
:
0.1
aug_scale_max
:
1.9
max_num_instances
:
300
letter_box
:
true
random_flip
:
true
aug_rand_translate
:
0.1
area_thresh
:
0.1
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
0 → 100755
View file @
b261ebb4
# --experiment_type=yolo_darknet
# mAP 43.0
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
smart_bias_lr
:
0.0
model
:
darknet_based_model
:
true
input_size
:
[
512
,
512
,
3
]
backbone
:
type
:
'
darknet'
darknet
:
model_id
:
'
cspdarknet53'
max_level
:
5
min_level
:
3
decoder
:
type
:
yolo_decoder
yolo_decoder
:
version
:
v4
type
:
regular
activation
:
leaky
head
:
smart_bias
:
true
detection_generator
:
box_type
:
'
all'
:
original
scale_xy
:
'
5'
:
1.05
'
4'
:
1.1
'
3'
:
1.2
max_boxes
:
200
nms_type
:
iou
iou_thresh
:
0.001
nms_thresh
:
0.60
loss
:
use_scaled_loss
:
false
box_loss_type
:
'
all'
:
ciou
ignore_thresh
:
'
all'
:
0.7
iou_normalizer
:
'
all'
:
0.07
cls_normalizer
:
'
all'
:
1.0
object_normalizer
:
'
all'
:
1.0
objectness_smooth
:
'
all'
:
0.0
max_delta
:
'
all'
:
5.0
norm_activation
:
activation
:
mish
norm_epsilon
:
0.0001
norm_momentum
:
0.99
use_sync_bn
:
true
num_classes
:
80
anchor_boxes
:
anchors_per_scale
:
3
boxes
:
[
box
:
[
12
,
16
],
box
:
[
19
,
36
],
box
:
[
40
,
28
],
box
:
[
36
,
75
],
box
:
[
76
,
55
],
box
:
[
72
,
146
],
box
:
[
142
,
110
],
box
:
[
192
,
243
],
box
:
[
459
,
401
]]
train_data
:
global_batch_size
:
64
dtype
:
float32
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
is_training
:
true
drop_remainder
:
true
seed
:
1000
parser
:
mosaic
:
mosaic_frequency
:
0.75
mixup_frequency
:
0.0
mosaic_crop_mode
:
'
crop'
mosaic_center
:
0.2
aug_scale_min
:
0.2
aug_scale_max
:
1.6
jitter
:
0.3
max_num_instances
:
200
letter_box
:
false
random_flip
:
true
aug_rand_saturation
:
1.5
aug_rand_brightness
:
1.5
aug_rand_hue
:
0.1
aug_scale_min
:
0.1
aug_scale_max
:
1.9
aug_rand_translate
:
0.0
jitter
:
0.3
area_thresh
:
0.1
random_pad
:
true
use_tie_breaker
:
true
anchor_thresh
:
0.4
validation_data
:
global_batch_size
:
8
dtype
:
float32
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
is_training
:
false
drop_remainder
:
true
parser
:
max_num_instances
:
200
letter_box
:
false
use_tie_breaker
:
true
anchor_thresh
:
0.4
weight_decay
:
0.000
init_checkpoint
:
'
gs://tf_model_garden/vision/yolo/ckpt-15000'
init_checkpoint_modules
:
'
backbone'
annotation_file
:
null
trainer
:
train_steps
:
555000
validation_steps
:
625
steps_per_loop
:
1850
summary_interval
:
1850
validation_interval
:
9250
checkpoint_interval
:
1850
optimizer_config
:
ema
:
average_decay
:
0.9998
trainable_weights_only
:
false
dynamic_decay
:
true
learning_rate
:
type
:
stepwise
stepwise
:
boundaries
:
[
400000
]
name
:
PiecewiseConstantDecay
values
:
[
0.00131
,
0.000131
]
optimizer
:
type
:
sgd_torch
sgd_torch
:
momentum
:
0.949
momentum_start
:
0.949
nesterov
:
true
warmup_steps
:
1000
weight_decay
:
0.0005
name
:
SGD
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
1000
# learning rate rises from 0 to 0.0013 over 1000 steps
official/vision/beta/projects/yolo/configs/yolo.py
0 → 100755
View file @
b261ebb4
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YOLO configuration definition."""
import
dataclasses
import
os
from
typing
import
Any
,
List
,
Optional
,
Union
import
numpy
as
np
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
common
from
official.vision.beta.projects.yolo
import
optimization
from
official.vision.beta.projects.yolo.configs
import
backbones
from
official.vision.beta.projects.yolo.configs
import
decoders
# pytype: disable=annotation-type-mismatch
MIN_LEVEL
=
1
MAX_LEVEL
=
7
GLOBAL_SEED
=
1000
def
_build_dict
(
min_level
,
max_level
,
value
):
vals
=
{
str
(
key
):
value
for
key
in
range
(
min_level
,
max_level
+
1
)}
vals
[
'all'
]
=
None
return
lambda
:
vals
def
_build_path_scales
(
min_level
,
max_level
):
return
lambda
:
{
str
(
key
):
2
**
key
for
key
in
range
(
min_level
,
max_level
+
1
)}
@
dataclasses
.
dataclass
class
FPNConfig
(
hyperparams
.
Config
):
"""FPN config."""
all
:
Optional
[
Any
]
=
None
def
get
(
self
):
"""Allow for a key for each level or a single key for all the levels."""
values
=
self
.
as_dict
()
if
'all'
in
values
and
values
[
'all'
]
is
not
None
:
for
key
in
values
:
if
key
!=
'all'
:
values
[
key
]
=
values
[
'all'
]
return
values
# pylint: disable=missing-class-docstring
@
dataclasses
.
dataclass
class
TfExampleDecoder
(
hyperparams
.
Config
):
regenerate_source_id
:
bool
=
False
coco91_to_80
:
bool
=
True
@
dataclasses
.
dataclass
class
TfExampleDecoderLabelMap
(
hyperparams
.
Config
):
regenerate_source_id
:
bool
=
False
label_map
:
str
=
''
@
dataclasses
.
dataclass
class
DataDecoder
(
hyperparams
.
OneOfConfig
):
type
:
Optional
[
str
]
=
'simple_decoder'
simple_decoder
:
TfExampleDecoder
=
TfExampleDecoder
()
label_map_decoder
:
TfExampleDecoderLabelMap
=
TfExampleDecoderLabelMap
()
@
dataclasses
.
dataclass
class
Mosaic
(
hyperparams
.
Config
):
mosaic_frequency
:
float
=
0.0
mixup_frequency
:
float
=
0.0
mosaic_center
:
float
=
0.2
mosaic_crop_mode
:
Optional
[
str
]
=
None
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
jitter
:
float
=
0.0
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
max_num_instances
:
int
=
200
letter_box
:
Optional
[
bool
]
=
True
random_flip
:
bool
=
True
random_pad
:
float
=
False
jitter
:
float
=
0.0
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
aug_rand_saturation
:
float
=
0.0
aug_rand_brightness
:
float
=
0.0
aug_rand_hue
:
float
=
0.0
aug_rand_angle
:
float
=
0.0
aug_rand_translate
:
float
=
0.0
aug_rand_perspective
:
float
=
0.0
use_tie_breaker
:
bool
=
True
best_match_only
:
bool
=
False
anchor_thresh
:
float
=
-
0.01
area_thresh
:
float
=
0.1
mosaic
:
Mosaic
=
Mosaic
()
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
global_batch_size
:
int
=
64
input_path
:
str
=
''
tfds_name
:
str
=
''
tfds_split
:
str
=
''
global_batch_size
:
int
=
1
is_training
:
bool
=
True
dtype
:
str
=
'float16'
decoder
:
DataDecoder
=
DataDecoder
()
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10000
tfds_download
:
bool
=
True
cache
:
bool
=
False
drop_remainder
:
bool
=
True
@
dataclasses
.
dataclass
class
YoloHead
(
hyperparams
.
Config
):
"""Parameterization for the YOLO Head."""
smart_bias
:
bool
=
True
@
dataclasses
.
dataclass
class
YoloDetectionGenerator
(
hyperparams
.
Config
):
box_type
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
'original'
))
scale_xy
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
path_scales
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_path_scales
(
MIN_LEVEL
,
MAX_LEVEL
))
nms_type
:
str
=
'greedy'
iou_thresh
:
float
=
0.001
nms_thresh
:
float
=
0.6
max_boxes
:
int
=
200
pre_nms_points
:
int
=
5000
@
dataclasses
.
dataclass
class
YoloLoss
(
hyperparams
.
Config
):
ignore_thresh
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
0.0
))
truth_thresh
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
box_loss_type
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
'ciou'
))
iou_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
cls_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
object_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
max_delta
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
np
.
inf
))
objectness_smooth
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
0.0
))
label_smoothing
:
float
=
0.0
use_scaled_loss
:
bool
=
True
update_on_repeat
:
bool
=
True
@
dataclasses
.
dataclass
class
Box
(
hyperparams
.
Config
):
box
:
List
[
int
]
=
dataclasses
.
field
(
default
=
list
)
@
dataclasses
.
dataclass
class
AnchorBoxes
(
hyperparams
.
Config
):
boxes
:
Optional
[
List
[
Box
]]
=
None
level_limits
:
Optional
[
List
[
int
]]
=
None
anchors_per_scale
:
int
=
3
def
get
(
self
,
min_level
,
max_level
):
"""Distribute them in order to each level.
Args:
min_level: `int` the lowest output level.
max_level: `int` the heighest output level.
Returns:
anchors_per_level: A `Dict[List[int]]` of the anchor boxes for each level.
self.level_limits: A `List[int]` of the box size limits to link to each
level under anchor free conditions.
"""
if
self
.
level_limits
is
None
:
boxes
=
[
box
.
box
for
box
in
self
.
boxes
]
else
:
boxes
=
[[
1.0
,
1.0
]]
*
((
max_level
-
min_level
)
+
1
)
self
.
anchors_per_scale
=
1
anchors_per_level
=
dict
()
start
=
0
for
i
in
range
(
min_level
,
max_level
+
1
):
anchors_per_level
[
str
(
i
)]
=
boxes
[
start
:
start
+
self
.
anchors_per_scale
]
start
+=
self
.
anchors_per_scale
return
anchors_per_level
,
self
.
level_limits
@
dataclasses
.
dataclass
class
Yolo
(
hyperparams
.
Config
):
input_size
:
Optional
[
List
[
int
]]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
512
,
512
,
3
])
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'darknet'
,
darknet
=
backbones
.
Darknet
(
model_id
=
'cspdarknet53'
))
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'yolo_decoder'
,
yolo_decoder
=
decoders
.
YoloDecoder
(
version
=
'v4'
,
type
=
'regular'
))
head
:
YoloHead
=
YoloHead
()
detection_generator
:
YoloDetectionGenerator
=
YoloDetectionGenerator
()
loss
:
YoloLoss
=
YoloLoss
()
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
activation
=
'mish'
,
use_sync_bn
=
True
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
)
num_classes
:
int
=
80
anchor_boxes
:
AnchorBoxes
=
AnchorBoxes
()
darknet_based_model
:
bool
=
False
@
dataclasses
.
dataclass
class
YoloTask
(
cfg
.
TaskConfig
):
per_category_metrics
:
bool
=
False
smart_bias_lr
:
float
=
0.0
model
:
Yolo
=
Yolo
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
weight_decay
:
float
=
0.0
annotation_file
:
Optional
[
str
]
=
None
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
Union
[
str
,
List
[
str
]]
=
'all'
# all, backbone, and/or decoder
gradient_clip_norm
:
float
=
0.0
seed
=
GLOBAL_SEED
COCO_INPUT_PATH_BASE
=
'coco'
COCO_TRAIN_EXAMPLES
=
118287
COCO_VAL_EXAMPLES
=
5000
@
exp_factory
.
register_config_factory
(
'yolo'
)
def
yolo
()
->
cfg
.
ExperimentConfig
:
"""Yolo general config."""
return
cfg
.
ExperimentConfig
(
task
=
YoloTask
(),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
@
exp_factory
.
register_config_factory
(
'yolo_darknet'
)
def
yolo_darknet
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with YOLOv3 and v4."""
train_batch_size
=
64
eval_batch_size
=
8
train_epochs
=
300
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
validation_interval
=
5
max_num_instances
=
200
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
YoloTask
(
smart_bias_lr
=
0.1
,
init_checkpoint
=
''
,
init_checkpoint_modules
=
'backbone'
,
annotation_file
=
None
,
weight_decay
=
0.0
,
model
=
Yolo
(
darknet_based_model
=
True
,
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
),
head
=
YoloHead
(
smart_bias
=
True
),
loss
=
YoloLoss
(
use_scaled_loss
=
False
,
update_on_repeat
=
True
),
anchor_boxes
=
AnchorBoxes
(
anchors_per_scale
=
3
,
boxes
=
[
Box
(
box
=
[
12
,
16
]),
Box
(
box
=
[
19
,
36
]),
Box
(
box
=
[
40
,
28
]),
Box
(
box
=
[
36
,
75
]),
Box
(
box
=
[
76
,
55
]),
Box
(
box
=
[
72
,
146
]),
Box
(
box
=
[
142
,
110
]),
Box
(
box
=
[
192
,
243
]),
Box
(
box
=
[
459
,
401
])
])),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
False
,
aug_rand_saturation
=
1.5
,
aug_rand_brightness
=
1.5
,
aug_rand_hue
=
0.1
,
use_tie_breaker
=
True
,
best_match_only
=
False
,
anchor_thresh
=
0.4
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
mosaic
=
Mosaic
(
mosaic_frequency
=
0.75
,
mixup_frequency
=
0.0
,
mosaic_crop_mode
=
'crop'
,
mosaic_center
=
0.2
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
True
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
False
,
use_tie_breaker
=
True
,
best_match_only
=
False
,
anchor_thresh
=
0.4
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
))),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
train_epochs
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
validation_interval
*
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'ema'
:
{
'average_decay'
:
0.9998
,
'trainable_weights_only'
:
False
,
'dynamic_decay'
:
True
,
},
'optimizer'
:
{
'type'
:
'sgd_torch'
,
'sgd_torch'
:
{
'momentum'
:
0.949
,
'momentum_start'
:
0.949
,
'nesterov'
:
True
,
'warmup_steps'
:
1000
,
'weight_decay'
:
0.0005
,
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
240
*
steps_per_epoch
],
'values'
:
[
0.00131
*
train_batch_size
/
64.0
,
0.000131
*
train_batch_size
/
64.0
,
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
1000
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'scaled_yolo'
)
def
scaled_yolo
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with YOLOv4-csp and v4."""
train_batch_size
=
64
eval_batch_size
=
8
train_epochs
=
300
warmup_epochs
=
3
validation_interval
=
5
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
max_num_instances
=
300
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
YoloTask
(
smart_bias_lr
=
0.1
,
init_checkpoint_modules
=
''
,
annotation_file
=
None
,
weight_decay
=
0.0
,
model
=
Yolo
(
darknet_based_model
=
False
,
norm_activation
=
common
.
NormActivation
(
activation
=
'mish'
,
use_sync_bn
=
True
,
norm_epsilon
=
0.0001
,
norm_momentum
=
0.97
),
head
=
YoloHead
(
smart_bias
=
True
),
loss
=
YoloLoss
(
use_scaled_loss
=
True
),
anchor_boxes
=
AnchorBoxes
(
anchors_per_scale
=
3
,
boxes
=
[
Box
(
box
=
[
12
,
16
]),
Box
(
box
=
[
19
,
36
]),
Box
(
box
=
[
40
,
28
]),
Box
(
box
=
[
36
,
75
]),
Box
(
box
=
[
76
,
55
]),
Box
(
box
=
[
72
,
146
]),
Box
(
box
=
[
142
,
110
]),
Box
(
box
=
[
192
,
243
]),
Box
(
box
=
[
459
,
401
])
])),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
dtype
=
'float32'
,
parser
=
Parser
(
aug_rand_saturation
=
0.7
,
aug_rand_brightness
=
0.4
,
aug_rand_hue
=
0.015
,
letter_box
=
True
,
use_tie_breaker
=
True
,
best_match_only
=
True
,
anchor_thresh
=
4.0
,
random_pad
=
False
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
mosaic
=
Mosaic
(
mosaic_crop_mode
=
'scale'
,
mosaic_frequency
=
1.0
,
mixup_frequency
=
0.0
,
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
True
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
True
,
use_tie_breaker
=
True
,
best_match_only
=
True
,
anchor_thresh
=
4.0
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
))),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
train_epochs
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
validation_interval
*
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'ema'
:
{
'average_decay'
:
0.9999
,
'trainable_weights_only'
:
False
,
'dynamic_decay'
:
True
,
},
'optimizer'
:
{
'type'
:
'sgd_torch'
,
'sgd_torch'
:
{
'momentum'
:
0.937
,
'momentum_start'
:
0.8
,
'nesterov'
:
True
,
'warmup_steps'
:
steps_per_epoch
*
warmup_epochs
,
'weight_decay'
:
0.0005
*
train_batch_size
/
64.0
,
}
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
0.01
,
'alpha'
:
0.2
,
'decay_steps'
:
train_epochs
*
steps_per_epoch
,
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
steps_per_epoch
*
warmup_epochs
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
View file @
b261ebb4
...
@@ -75,11 +75,11 @@ class Parser(parser.Parser):
...
@@ -75,11 +75,11 @@ class Parser(parser.Parser):
saturation. saturation will be scaled between 1/value and value.
saturation. saturation will be scaled between 1/value and value.
aug_rand_brightness: `float` indicating the maximum scaling value for
aug_rand_brightness: `float` indicating the maximum scaling value for
brightness. brightness will be scaled between 1/value and value.
brightness. brightness will be scaled between 1/value and value.
letter_box: `boolean` indicating whether upon start of the datapipeline
letter_box: `boolean` indicating whether upon start of the data
pipeline
regardless of the preprocessing ops that are used, the aspect ratio of
regardless of the preprocessing ops that are used, the aspect ratio of
the images should be preserved.
the images should be preserved.
random_pad: `bool` indiccating wether to use padding to apply random
random_pad: `bool` indiccating wether to use padding to apply random
translation true for darknet yolo false for scaled yolo.
translation
,
true for darknet yolo false for scaled yolo.
random_flip: `boolean` indicating whether or not to randomly flip the
random_flip: `boolean` indicating whether or not to randomly flip the
image horizontally.
image horizontally.
jitter: `float` for the maximum change in aspect ratio expected in each
jitter: `float` for the maximum change in aspect ratio expected in each
...
@@ -147,6 +147,7 @@ class Parser(parser.Parser):
...
@@ -147,6 +147,7 @@ class Parser(parser.Parser):
# Set the per level values needed for operation
# Set the per level values needed for operation
self
.
_darknet
=
darknet
self
.
_darknet
=
darknet
self
.
_area_thresh
=
area_thresh
self
.
_area_thresh
=
area_thresh
self
.
_level_limits
=
level_limits
self
.
_seed
=
seed
self
.
_seed
=
seed
self
.
_dtype
=
dtype
self
.
_dtype
=
dtype
...
@@ -259,7 +260,7 @@ class Parser(parser.Parser):
...
@@ -259,7 +260,7 @@ class Parser(parser.Parser):
self
.
_aug_rand_saturation
,
self
.
_aug_rand_saturation
,
self
.
_aug_rand_brightness
,
self
.
_aug_rand_brightness
,
seed
=
self
.
_seed
,
seed
=
self
.
_seed
,
darknet
=
self
.
_darknet
)
darknet
=
self
.
_darknet
or
self
.
_level_limits
is
not
None
)
# Cast the image to the selcted datatype.
# Cast the image to the selcted datatype.
image
,
labels
=
self
.
_build_label
(
image
,
labels
=
self
.
_build_label
(
...
...
official/vision/beta/projects/yolo/losses/yolo_loss.py
View file @
b261ebb4
...
@@ -40,7 +40,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
...
@@ -40,7 +40,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
loss_type
=
'ciou'
,
loss_type
=
'ciou'
,
iou_normalizer
=
1.0
,
iou_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
obj
ect
_normalizer
=
1.0
,
label_smoothing
=
0.0
,
label_smoothing
=
0.0
,
objectness_smooth
=
True
,
objectness_smooth
=
True
,
update_on_repeat
=
False
,
update_on_repeat
=
False
,
...
@@ -65,7 +65,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
...
@@ -65,7 +65,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
label_smoothing: `float` for how much to smooth the loss on the classes.
label_smoothing: `float` for how much to smooth the loss on the classes.
objectness_smooth: `float` for how much to smooth the loss on the
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
detection map.
...
@@ -90,7 +91,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
...
@@ -90,7 +91,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
self
.
_iou_normalizer
=
iou_normalizer
self
.
_iou_normalizer
=
iou_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_obj_normalizer
=
obj_normalizer
self
.
_obj
ect
_normalizer
=
obj
ect
_normalizer
self
.
_scale_x_y
=
scale_x_y
self
.
_scale_x_y
=
scale_x_y
self
.
_max_delta
=
max_delta
self
.
_max_delta
=
max_delta
...
@@ -240,9 +241,14 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
...
@@ -240,9 +241,14 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
Returns:
Returns:
loss: `tf.float` scalar for the scaled loss.
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
"""
del
box_loss
,
conf_loss
,
class_loss
,
ground_truths
,
predictions
del
box_loss
return
loss
del
conf_loss
del
class_loss
del
ground_truths
del
predictions
return
loss
,
tf
.
ones_like
(
loss
)
@
abc
.
abstractmethod
@
abc
.
abstractmethod
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
...
@@ -349,16 +355,16 @@ class DarknetLoss(YoloLossBase):
...
@@ -349,16 +355,16 @@ class DarknetLoss(YoloLossBase):
tf
.
cast
(
true_class
,
tf
.
int32
),
tf
.
cast
(
true_class
,
tf
.
int32
),
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
dtype
=
pred_class
.
dtype
)
dtype
=
pred_class
.
dtype
)
true_class
es
=
tf
.
stop_gradient
(
loss_utils
.
apply_mask
(
ind_mask
,
true_class
))
true_class
=
tf
.
stop_gradient
(
loss_utils
.
apply_mask
(
ind_mask
,
true_class
))
# Reorganize the one hot class list as a grid.
# Reorganize the one hot class list as a grid.
true_class
=
loss_utils
.
build_grid
(
true_class
_grid
=
loss_utils
.
build_grid
(
inds
,
true_class
es
,
pred_class
,
ind_mask
,
update
=
False
)
inds
,
true_class
,
pred_class
,
ind_mask
,
update
=
False
)
true_class
=
tf
.
stop_gradient
(
true_class
)
true_class
_grid
=
tf
.
stop_gradient
(
true_class
_grid
)
# Use the class mask to find the number of objects located in
# Use the class mask to find the number of objects located in
# each predicted grid cell/pixel.
# each predicted grid cell/pixel.
counts
=
true_class
counts
=
true_class
_grid
counts
=
tf
.
reduce_sum
(
counts
,
axis
=-
1
,
keepdims
=
True
)
counts
=
tf
.
reduce_sum
(
counts
,
axis
=-
1
,
keepdims
=
True
)
reps
=
tf
.
gather_nd
(
counts
,
inds
,
batch_dims
=
1
)
reps
=
tf
.
gather_nd
(
counts
,
inds
,
batch_dims
=
1
)
reps
=
tf
.
squeeze
(
reps
,
axis
=-
1
)
reps
=
tf
.
squeeze
(
reps
,
axis
=-
1
)
...
@@ -372,19 +378,26 @@ class DarknetLoss(YoloLossBase):
...
@@ -372,19 +378,26 @@ class DarknetLoss(YoloLossBase):
box_loss
=
math_ops
.
divide_no_nan
(
box_loss
,
reps
)
box_loss
=
math_ops
.
divide_no_nan
(
box_loss
,
reps
)
box_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
box_loss
,
axis
=
1
),
dtype
=
y_pred
.
dtype
)
box_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
box_loss
,
axis
=
1
),
dtype
=
y_pred
.
dtype
)
if
self
.
_update_on_repeat
:
# Converts list of gound truths into a grid where repeated values
# are replaced by the most recent value. So some class identities may
# get lost but the loss computation will be more stable. Results are
# more consistent.
# Compute the sigmoid binary cross entropy for the class maps.
# Compute the sigmoid binary cross entropy for the class maps.
class_loss
=
tf
.
reduce_mean
(
class_loss
=
tf
.
reduce_mean
(
loss_utils
.
sigmoid_bce
(
loss_utils
.
sigmoid_bce
(
tf
.
expand_dims
(
true_class
,
axis
=-
1
),
tf
.
expand_dims
(
true_class
_grid
,
axis
=-
1
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
self
.
_label_smoothing
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
self
.
_label_smoothing
),
axis
=-
1
)
axis
=-
1
)
# Apply normalization to the class losses.
# Apply normalization to the class losses.
if
self
.
_cls_normalizer
<
1.0
:
if
self
.
_cls_normalizer
<
1.0
:
# Build a mask based on the true class locations.
# Build a mask based on the true class locations.
cls_norm_mask
=
true_class
cls_norm_mask
=
true_class
_grid
# Apply the classes weight to class indexes were one_hot is one.
# Apply the classes weight to class indexes were one_hot is one.
class_loss
*=
((
1
-
cls_norm_mask
)
+
cls_norm_mask
*
self
.
_cls_normalizer
)
class_loss
*=
((
1
-
cls_norm_mask
)
+
cls_norm_mask
*
self
.
_cls_normalizer
)
# Mask to the class loss and compute the sum over all the objects.
# Mask to the class loss and compute the sum over all the objects.
class_loss
=
tf
.
reduce_sum
(
class_loss
,
axis
=-
1
)
class_loss
=
tf
.
reduce_sum
(
class_loss
,
axis
=-
1
)
...
@@ -392,6 +405,23 @@ class DarknetLoss(YoloLossBase):
...
@@ -392,6 +405,23 @@ class DarknetLoss(YoloLossBase):
class_loss
=
math_ops
.
rm_nan_inf
(
class_loss
,
val
=
0.0
)
class_loss
=
math_ops
.
rm_nan_inf
(
class_loss
,
val
=
0.0
)
class_loss
=
tf
.
cast
(
class_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
,
3
)),
dtype
=
y_pred
.
dtype
)
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
,
3
)),
dtype
=
y_pred
.
dtype
)
else
:
# Computes the loss while keeping the structure as a list in
# order to ensure all objects are considered. In some cases can
# make training more unstable but may also return higher APs.
pred_class
=
loss_utils
.
apply_mask
(
ind_mask
,
tf
.
gather_nd
(
pred_class
,
inds
,
batch_dims
=
1
))
class_loss
=
tf
.
keras
.
losses
.
binary_crossentropy
(
tf
.
expand_dims
(
true_class
,
axis
=-
1
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
label_smoothing
=
self
.
_label_smoothing
,
from_logits
=
True
)
class_loss
=
loss_utils
.
apply_mask
(
ind_mask
,
class_loss
)
class_loss
=
math_ops
.
divide_no_nan
(
class_loss
,
tf
.
expand_dims
(
reps
,
axis
=-
1
))
class_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
)),
dtype
=
y_pred
.
dtype
)
class_loss
*=
self
.
_cls_normalizer
# Compute the sigmoid binary cross entropy for the confidence maps.
# Compute the sigmoid binary cross entropy for the confidence maps.
bce
=
tf
.
reduce_mean
(
bce
=
tf
.
reduce_mean
(
...
@@ -406,7 +436,7 @@ class DarknetLoss(YoloLossBase):
...
@@ -406,7 +436,7 @@ class DarknetLoss(YoloLossBase):
# Apply the weights to each loss.
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
box_loss
*=
self
.
_iou_normalizer
conf_loss
*=
self
.
_obj_normalizer
conf_loss
*=
self
.
_obj
ect
_normalizer
# Add all the losses together then take the mean over the batches.
# Add all the losses together then take the mean over the batches.
loss
=
box_loss
+
class_loss
+
conf_loss
loss
=
box_loss
+
class_loss
+
conf_loss
...
@@ -547,7 +577,7 @@ class ScaledLoss(YoloLossBase):
...
@@ -547,7 +577,7 @@ class ScaledLoss(YoloLossBase):
# Apply the weights to each loss.
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
box_loss
*=
self
.
_iou_normalizer
class_loss
*=
self
.
_cls_normalizer
class_loss
*=
self
.
_cls_normalizer
conf_loss
*=
self
.
_obj_normalizer
conf_loss
*=
self
.
_obj
ect
_normalizer
# Add all the losses together then take the sum over the batches.
# Add all the losses together then take the sum over the batches.
mean_loss
=
box_loss
+
class_loss
+
conf_loss
mean_loss
=
box_loss
+
class_loss
+
conf_loss
...
@@ -575,12 +605,13 @@ class ScaledLoss(YoloLossBase):
...
@@ -575,12 +605,13 @@ class ScaledLoss(YoloLossBase):
predictions: `Dict` holding all the predicted values.
predictions: `Dict` holding all the predicted values.
Returns:
Returns:
loss: `tf.float` scalar for the scaled loss.
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
"""
scale
=
tf
.
stop_gradient
(
3
/
len
(
list
(
predictions
.
keys
())))
scale
=
tf
.
stop_gradient
(
3
/
len
(
list
(
predictions
.
keys
())))
return
loss
*
scale
return
loss
*
scale
,
1
/
scale
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""
t
his method is not specific to each loss path, but each loss type."""
"""
T
his method is not specific to each loss path, but each loss type."""
return
loss
return
loss
...
@@ -597,7 +628,7 @@ class YoloLoss:
...
@@ -597,7 +628,7 @@ class YoloLoss:
loss_types
=
None
,
loss_types
=
None
,
iou_normalizers
=
None
,
iou_normalizers
=
None
,
cls_normalizers
=
None
,
cls_normalizers
=
None
,
obj_normalizers
=
None
,
obj
ect
_normalizers
=
None
,
objectness_smooths
=
None
,
objectness_smooths
=
None
,
box_types
=
None
,
box_types
=
None
,
scale_xys
=
None
,
scale_xys
=
None
,
...
@@ -627,8 +658,8 @@ class YoloLoss:
...
@@ -627,8 +658,8 @@ class YoloLoss:
or the boxes for each FPN path.
or the boxes for each FPN path.
cls_normalizers: `Dict[float]` for how much to scale the loss on the
cls_normalizers: `Dict[float]` for how much to scale the loss on the
classes for each FPN path.
classes for each FPN path.
obj_normalizers: `Dict[float]` for how much to scale loss on the
detection
obj
ect
_normalizers: `Dict[float]` for how much to scale loss on the
map for each FPN path.
detection
map for each FPN path.
objectness_smooths: `Dict[float]` for how much to smooth the loss on the
objectness_smooths: `Dict[float]` for how much to smooth the loss on the
detection map for each FPN path.
detection map for each FPN path.
box_types: `Dict[bool]` for which scaling type to use for each FPN path.
box_types: `Dict[bool]` for which scaling type to use for each FPN path.
...
@@ -666,7 +697,7 @@ class YoloLoss:
...
@@ -666,7 +697,7 @@ class YoloLoss:
loss_type
=
loss_types
[
key
],
loss_type
=
loss_types
[
key
],
iou_normalizer
=
iou_normalizers
[
key
],
iou_normalizer
=
iou_normalizers
[
key
],
cls_normalizer
=
cls_normalizers
[
key
],
cls_normalizer
=
cls_normalizers
[
key
],
obj_normalizer
=
obj_normalizers
[
key
],
obj
ect
_normalizer
=
obj
ect
_normalizers
[
key
],
box_type
=
box_types
[
key
],
box_type
=
box_types
[
key
],
objectness_smooth
=
objectness_smooths
[
key
],
objectness_smooth
=
objectness_smooths
[
key
],
max_delta
=
max_deltas
[
key
],
max_delta
=
max_deltas
[
key
],
...
@@ -695,10 +726,8 @@ class YoloLoss:
...
@@ -695,10 +726,8 @@ class YoloLoss:
# after computing the loss, scale loss as needed for aggregation
# after computing the loss, scale loss as needed for aggregation
# across FPN levels
# across FPN levels
loss
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss
,
loss_box
,
loss
,
scale
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss_conf
,
loss_class
,
loss
,
loss_box
,
loss_conf
,
loss_class
,
ground_truth
,
predictions
)
ground_truth
,
predictions
)
# after completing the scaling of the loss on each replica, handle
# after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas
# scaling the loss for mergeing the loss across replicas
...
@@ -708,12 +737,13 @@ class YoloLoss:
...
@@ -708,12 +737,13 @@ class YoloLoss:
# detach all the below gradients: none of them should make a
# detach all the below gradients: none of them should make a
# contribution to the gradient form this point forwards
# contribution to the gradient form this point forwards
metric_loss
+=
tf
.
stop_gradient
(
mean_loss
)
metric_loss
+=
tf
.
stop_gradient
(
mean_loss
/
scale
)
metric_dict
[
key
][
'loss'
]
=
tf
.
stop_gradient
(
mean_loss
)
metric_dict
[
key
][
'loss'
]
=
tf
.
stop_gradient
(
mean_loss
/
scale
)
metric_dict
[
key
][
'avg_iou'
]
=
tf
.
stop_gradient
(
avg_iou
)
metric_dict
[
key
][
'avg_iou'
]
=
tf
.
stop_gradient
(
avg_iou
)
metric_dict
[
key
][
'avg_obj'
]
=
tf
.
stop_gradient
(
avg_obj
)
metric_dict
[
key
][
'avg_obj'
]
=
tf
.
stop_gradient
(
avg_obj
)
metric_dict
[
'net'
][
'box'
]
+=
tf
.
stop_gradient
(
loss_box
)
metric_dict
[
'net'
][
'box'
]
+=
tf
.
stop_gradient
(
loss_box
/
scale
)
metric_dict
[
'net'
][
'class'
]
+=
tf
.
stop_gradient
(
loss_class
)
metric_dict
[
'net'
][
'class'
]
+=
tf
.
stop_gradient
(
loss_class
/
scale
)
metric_dict
[
'net'
][
'conf'
]
+=
tf
.
stop_gradient
(
loss_conf
)
metric_dict
[
'net'
][
'conf'
]
+=
tf
.
stop_gradient
(
loss_conf
/
scale
)
return
loss_val
,
metric_loss
,
metric_dict
return
loss_val
,
metric_loss
,
metric_dict
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
View file @
b261ebb4
...
@@ -60,7 +60,7 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -60,7 +60,7 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
loss_types
=
{
key
:
'ciou'
for
key
in
keys
},
loss_types
=
{
key
:
'ciou'
for
key
in
keys
},
iou_normalizers
=
{
key
:
0.05
for
key
in
keys
},
iou_normalizers
=
{
key
:
0.05
for
key
in
keys
},
cls_normalizers
=
{
key
:
0.5
for
key
in
keys
},
cls_normalizers
=
{
key
:
0.5
for
key
in
keys
},
obj_normalizers
=
{
key
:
1.0
for
key
in
keys
},
obj
ect
_normalizers
=
{
key
:
1.0
for
key
in
keys
},
objectness_smooths
=
{
key
:
1.0
for
key
in
keys
},
objectness_smooths
=
{
key
:
1.0
for
key
in
keys
},
box_types
=
{
key
:
'scaled'
for
key
in
keys
},
box_types
=
{
key
:
'scaled'
for
key
in
keys
},
scale_xys
=
{
key
:
2.0
for
key
in
keys
},
scale_xys
=
{
key
:
2.0
for
key
in
keys
},
...
...
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
b261ebb4
...
@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
...
@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
def
_build_struct
(
self
,
net
,
inputs
):
def
_build_struct
(
self
,
net
,
inputs
):
if
self
.
_use_reorg_input
:
if
self
.
_use_reorg_input
:
inputs
=
nn_blocks
.
Reorg
()(
inputs
)
inputs
=
nn_blocks
.
Reorg
()(
inputs
)
net
[
0
].
filters
=
net
[
1
].
filters
net
[
0
].
output_name
=
net
[
1
].
output_name
del
net
[
1
]
endpoints
=
collections
.
OrderedDict
()
endpoints
=
collections
.
OrderedDict
()
stack_outputs
=
[
inputs
]
stack_outputs
=
[
inputs
]
...
...
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
View file @
b261ebb4
...
@@ -13,10 +13,66 @@
...
@@ -13,10 +13,66 @@
# limitations under the License.
# limitations under the License.
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
from
typing
import
Mapping
,
Union
,
Optional
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.vision.beta.modeling.decoders
import
factory
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
# model configurations
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS
=
{
'v4'
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
csp
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
5
,
fpn_depth
=
5
,
path_process_len
=
6
),
csp_large
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
7
,
fpn_depth
=
7
,
path_process_len
=
8
,
fpn_filter_scale
=
2
),
),
'v3'
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
spp
=
dict
(
embed_spp
=
True
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
),
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
_IdentityRoute
(
tf
.
keras
.
layers
.
Layer
):
class
_IdentityRoute
(
tf
.
keras
.
layers
.
Layer
):
...
@@ -487,3 +543,66 @@ class YoloDecoder(tf.keras.Model):
...
@@ -487,3 +543,66 @@ class YoloDecoder(tf.keras.Model):
@
classmethod
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
return
cls
(
**
config
)
@
factory
.
register_decoder_builder
(
'yolo_decoder'
)
def
build_yolo_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
)
->
Union
[
None
,
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]:
"""Builds Yolo FPN/PAN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
**kwargs: Additional kwargs arguments.
Returns:
A `tf.keras.Model` instance of the Yolo FPN/PAN decoder.
"""
decoder_cfg
=
model_config
.
decoder
.
get
()
norm_activation_config
=
model_config
.
norm_activation
activation
=
(
decoder_cfg
.
activation
if
decoder_cfg
.
activation
!=
'same'
else
norm_activation_config
.
activation
)
if
decoder_cfg
.
version
is
None
:
# custom yolo
raise
ValueError
(
'Decoder version cannot be None, specify v3 or v4.'
)
if
decoder_cfg
.
version
not
in
YOLO_MODELS
:
raise
ValueError
(
'Unsupported model version please select from {v3, v4}, '
'or specify a custom decoder config using YoloDecoder in you yaml'
)
if
decoder_cfg
.
type
is
None
:
decoder_cfg
.
type
=
'regular'
if
decoder_cfg
.
type
not
in
YOLO_MODELS
[
decoder_cfg
.
version
]:
raise
ValueError
(
'Unsupported model type please select from '
'{yolo_model.YOLO_MODELS[decoder_cfg.version].keys()}'
'or specify a custom decoder config using YoloDecoder.'
)
base_model
=
YOLO_MODELS
[
decoder_cfg
.
version
][
decoder_cfg
.
type
]
cfg_dict
=
decoder_cfg
.
as_dict
()
for
key
in
base_model
:
if
cfg_dict
[
key
]
is
not
None
:
base_model
[
key
]
=
cfg_dict
[
key
]
base_dict
=
dict
(
activation
=
activation
,
use_spatial_attention
=
decoder_cfg
.
use_spatial_attention
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
base_model
.
update
(
base_dict
)
model
=
YoloDecoder
(
input_specs
,
**
base_model
,
**
kwargs
)
return
model
official/vision/beta/projects/yolo/modeling/factory.py
0 → 100644
View file @
b261ebb4
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common factory functions yolo neural networks."""
from
absl
import
logging
from
official.vision.beta.modeling.backbones
import
factory
as
backbone_factory
from
official.vision.beta.modeling.decoders
import
factory
as
decoder_factory
from
official.vision.beta.projects.yolo.configs
import
yolo
from
official.vision.beta.projects.yolo.modeling
import
yolo_model
from
official.vision.beta.projects.yolo.modeling.heads
import
yolo_head
from
official.vision.beta.projects.yolo.modeling.layers
import
detection_generator
def
build_yolo_detection_generator
(
model_config
:
yolo
.
Yolo
,
anchor_boxes
):
"""Builds yolo detection generator."""
model
=
detection_generator
.
YoloLayer
(
classes
=
model_config
.
num_classes
,
anchors
=
anchor_boxes
,
iou_thresh
=
model_config
.
detection_generator
.
iou_thresh
,
nms_thresh
=
model_config
.
detection_generator
.
nms_thresh
,
max_boxes
=
model_config
.
detection_generator
.
max_boxes
,
pre_nms_points
=
model_config
.
detection_generator
.
pre_nms_points
,
nms_type
=
model_config
.
detection_generator
.
nms_type
,
box_type
=
model_config
.
detection_generator
.
box_type
.
get
(),
path_scale
=
model_config
.
detection_generator
.
path_scales
.
get
(),
scale_xy
=
model_config
.
detection_generator
.
scale_xy
.
get
(),
label_smoothing
=
model_config
.
loss
.
label_smoothing
,
use_scaled_loss
=
model_config
.
loss
.
use_scaled_loss
,
update_on_repeat
=
model_config
.
loss
.
update_on_repeat
,
truth_thresh
=
model_config
.
loss
.
truth_thresh
.
get
(),
loss_type
=
model_config
.
loss
.
box_loss_type
.
get
(),
max_delta
=
model_config
.
loss
.
max_delta
.
get
(),
iou_normalizer
=
model_config
.
loss
.
iou_normalizer
.
get
(),
cls_normalizer
=
model_config
.
loss
.
cls_normalizer
.
get
(),
object_normalizer
=
model_config
.
loss
.
object_normalizer
.
get
(),
ignore_thresh
=
model_config
.
loss
.
ignore_thresh
.
get
(),
objectness_smooth
=
model_config
.
loss
.
objectness_smooth
.
get
())
return
model
def
build_yolo_head
(
input_specs
,
model_config
:
yolo
.
Yolo
,
l2_regularization
):
"""Builds yolo head."""
min_level
=
min
(
map
(
int
,
input_specs
.
keys
()))
max_level
=
max
(
map
(
int
,
input_specs
.
keys
()))
head
=
yolo_head
.
YoloHead
(
min_level
=
min_level
,
max_level
=
max_level
,
classes
=
model_config
.
num_classes
,
boxes_per_level
=
model_config
.
anchor_boxes
.
anchors_per_scale
,
norm_momentum
=
model_config
.
norm_activation
.
norm_momentum
,
norm_epsilon
=
model_config
.
norm_activation
.
norm_epsilon
,
kernel_regularizer
=
l2_regularization
,
smart_bias
=
model_config
.
head
.
smart_bias
)
return
head
def
build_yolo
(
input_specs
,
model_config
,
l2_regularization
):
"""Builds yolo model."""
backbone
=
model_config
.
backbone
.
get
()
anchor_dict
,
_
=
model_config
.
anchor_boxes
.
get
(
backbone
.
min_level
,
backbone
.
max_level
)
backbone
=
backbone_factory
.
build_backbone
(
input_specs
,
model_config
.
backbone
,
model_config
.
norm_activation
,
l2_regularization
)
decoder
=
decoder_factory
.
build_decoder
(
backbone
.
output_specs
,
model_config
,
l2_regularization
)
head
=
build_yolo_head
(
decoder
.
output_specs
,
model_config
,
l2_regularization
)
detection_generator_obj
=
build_yolo_detection_generator
(
model_config
,
anchor_dict
)
model
=
yolo_model
.
Yolo
(
backbone
=
backbone
,
decoder
=
decoder
,
head
=
head
,
detection_generator
=
detection_generator_obj
)
model
.
build
(
input_specs
.
shape
)
model
.
summary
(
print_fn
=
logging
.
info
)
losses
=
detection_generator_obj
.
get_losses
()
return
model
,
losses
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
View file @
b261ebb4
...
@@ -36,7 +36,7 @@ class YoloLayer(tf.keras.Model):
...
@@ -36,7 +36,7 @@ class YoloLayer(tf.keras.Model):
loss_type
=
'ciou'
,
loss_type
=
'ciou'
,
iou_normalizer
=
1.0
,
iou_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
obj
ect
_normalizer
=
1.0
,
use_scaled_loss
=
False
,
use_scaled_loss
=
False
,
update_on_repeat
=
False
,
update_on_repeat
=
False
,
pre_nms_points
=
5000
,
pre_nms_points
=
5000
,
...
@@ -67,7 +67,8 @@ class YoloLayer(tf.keras.Model):
...
@@ -67,7 +67,8 @@ class YoloLayer(tf.keras.Model):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
use_scaled_loss: `bool` for whether to use the scaled loss
use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss.
or the traditional loss.
update_on_repeat: `bool` indicating how you would like to handle repeated
update_on_repeat: `bool` indicating how you would like to handle repeated
...
@@ -110,7 +111,7 @@ class YoloLayer(tf.keras.Model):
...
@@ -110,7 +111,7 @@ class YoloLayer(tf.keras.Model):
self
.
_truth_thresh
=
truth_thresh
self
.
_truth_thresh
=
truth_thresh
self
.
_iou_normalizer
=
iou_normalizer
self
.
_iou_normalizer
=
iou_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_obj_normalizer
=
obj_normalizer
self
.
_obj
ect
_normalizer
=
obj
ect
_normalizer
self
.
_objectness_smooth
=
objectness_smooth
self
.
_objectness_smooth
=
objectness_smooth
self
.
_nms_thresh
=
nms_thresh
self
.
_nms_thresh
=
nms_thresh
self
.
_max_boxes
=
max_boxes
self
.
_max_boxes
=
max_boxes
...
@@ -289,7 +290,7 @@ class YoloLayer(tf.keras.Model):
...
@@ -289,7 +290,7 @@ class YoloLayer(tf.keras.Model):
loss_types
=
self
.
_loss_type
,
loss_types
=
self
.
_loss_type
,
iou_normalizers
=
self
.
_iou_normalizer
,
iou_normalizers
=
self
.
_iou_normalizer
,
cls_normalizers
=
self
.
_cls_normalizer
,
cls_normalizers
=
self
.
_cls_normalizer
,
obj_normalizers
=
self
.
_obj_normalizer
,
obj
ect
_normalizers
=
self
.
_obj
ect
_normalizer
,
objectness_smooths
=
self
.
_objectness_smooth
,
objectness_smooths
=
self
.
_objectness_smooth
,
box_types
=
self
.
_box_type
,
box_types
=
self
.
_box_type
,
max_deltas
=
self
.
_max_delta
,
max_deltas
=
self
.
_max_delta
,
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
View file @
b261ebb4
...
@@ -14,7 +14,9 @@
...
@@ -14,7 +14,9 @@
"""Contains common building blocks for yolo neural networks."""
"""Contains common building blocks for yolo neural networks."""
from
typing
import
Callable
,
List
,
Tuple
from
typing
import
Callable
,
List
,
Tuple
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
from
official.vision.beta.ops
import
spatial_transform_ops
from
official.vision.beta.ops
import
spatial_transform_ops
...
@@ -141,6 +143,7 @@ class ConvBN(tf.keras.layers.Layer):
...
@@ -141,6 +143,7 @@ class ConvBN(tf.keras.layers.Layer):
# activation params
# activation params
self
.
_activation
=
activation
self
.
_activation
=
activation
self
.
_leaky_alpha
=
leaky_alpha
self
.
_leaky_alpha
=
leaky_alpha
self
.
_fuse
=
False
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
...
@@ -164,6 +167,8 @@ class ConvBN(tf.keras.layers.Layer):
...
@@ -164,6 +167,8 @@ class ConvBN(tf.keras.layers.Layer):
momentum
=
self
.
_norm_momentum
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
epsilon
=
self
.
_norm_epsilon
,
axis
=
self
.
_bn_axis
)
axis
=
self
.
_bn_axis
)
else
:
self
.
bn
=
None
if
self
.
_activation
==
'leaky'
:
if
self
.
_activation
==
'leaky'
:
self
.
_activation_fn
=
tf
.
keras
.
layers
.
LeakyReLU
(
alpha
=
self
.
_leaky_alpha
)
self
.
_activation_fn
=
tf
.
keras
.
layers
.
LeakyReLU
(
alpha
=
self
.
_leaky_alpha
)
...
@@ -174,11 +179,44 @@ class ConvBN(tf.keras.layers.Layer):
...
@@ -174,11 +179,44 @@ class ConvBN(tf.keras.layers.Layer):
def
call
(
self
,
x
):
def
call
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
conv
(
x
)
if
self
.
_use_bn
:
if
self
.
_use_bn
and
not
self
.
_fuse
:
x
=
self
.
bn
(
x
)
x
=
self
.
bn
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_activation_fn
(
x
)
return
x
return
x
def
fuse
(
self
):
if
self
.
bn
is
not
None
and
not
self
.
_use_separable_conv
:
# Fuse convolution and batchnorm, gives me +2 to 3 FPS 2ms latency.
# layers: https://tehnokv.com/posts/fusing-batchnorm-and-conv/
if
self
.
_fuse
:
return
self
.
_fuse
=
True
conv_weights
=
self
.
conv
.
get_weights
()[
0
]
gamma
,
beta
,
moving_mean
,
moving_variance
=
self
.
bn
.
get_weights
()
self
.
conv
.
use_bias
=
True
infilters
=
conv_weights
.
shape
[
-
2
]
self
.
conv
.
build
([
None
,
None
,
None
,
infilters
])
base
=
tf
.
sqrt
(
self
.
_norm_epsilon
+
moving_variance
)
w_conv_base
=
tf
.
transpose
(
conv_weights
,
perm
=
(
3
,
2
,
0
,
1
))
w_conv
=
tf
.
reshape
(
w_conv_base
,
[
conv_weights
.
shape
[
-
1
],
-
1
])
w_bn
=
tf
.
linalg
.
diag
(
gamma
/
base
)
w_conv
=
tf
.
reshape
(
tf
.
matmul
(
w_bn
,
w_conv
),
w_conv_base
.
get_shape
())
w_conv
=
tf
.
transpose
(
w_conv
,
perm
=
(
2
,
3
,
1
,
0
))
b_bn
=
beta
-
gamma
*
moving_mean
/
base
self
.
conv
.
set_weights
([
w_conv
,
b_bn
])
del
self
.
bn
self
.
trainable
=
False
self
.
conv
.
trainable
=
False
self
.
bn
=
None
return
def
get_config
(
self
):
def
get_config
(
self
):
# used to store/share parameters to reconstruct the model
# used to store/share parameters to reconstruct the model
layer_config
=
{
layer_config
=
{
...
...
official/vision/beta/projects/yolo/modeling/yolo_model.py
View file @
b261ebb4
...
@@ -14,72 +14,19 @@
...
@@ -14,72 +14,19 @@
"""Yolo models."""
"""Yolo models."""
from
typing
import
Mapping
,
Union
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
# static base Yolo Models that do not require configuration
# similar to a backbone model id.
# this is done greatly simplify the model config
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS
=
{
"v4"
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
csp
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
5
,
fpn_depth
=
5
,
path_process_len
=
6
),
csp_large
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
7
,
fpn_depth
=
7
,
path_process_len
=
8
,
fpn_filter_scale
=
2
),
),
"v3"
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
spp
=
dict
(
embed_spp
=
True
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
),
}
class
Yolo
(
tf
.
keras
.
Model
):
class
Yolo
(
tf
.
keras
.
Model
):
"""The YOLO model class."""
"""The YOLO model class."""
def
__init__
(
self
,
def
__init__
(
self
,
backbone
=
None
,
backbone
,
decoder
=
None
,
decoder
,
head
=
None
,
head
,
detection_generator
=
None
,
detection_generator
,
**
kwargs
):
**
kwargs
):
"""Detection initialization function.
"""Detection initialization function.
...
@@ -93,10 +40,10 @@ class Yolo(tf.keras.Model):
...
@@ -93,10 +40,10 @@ class Yolo(tf.keras.Model):
super
(
Yolo
,
self
).
__init__
(
**
kwargs
)
super
(
Yolo
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
self
.
_config_dict
=
{
"
backbone
"
:
backbone
,
'
backbone
'
:
backbone
,
"
decoder
"
:
decoder
,
'
decoder
'
:
decoder
,
"
head
"
:
head
,
'
head
'
:
head
,
"filter"
:
detection_generator
'detection_generator'
:
detection_generator
}
}
# model components
# model components
...
@@ -104,18 +51,19 @@ class Yolo(tf.keras.Model):
...
@@ -104,18 +51,19 @@ class Yolo(tf.keras.Model):
self
.
_decoder
=
decoder
self
.
_decoder
=
decoder
self
.
_head
=
head
self
.
_head
=
head
self
.
_detection_generator
=
detection_generator
self
.
_detection_generator
=
detection_generator
self
.
_fused
=
False
return
return
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
maps
=
self
.
_
backbone
(
inputs
)
maps
=
self
.
backbone
(
inputs
)
decoded_maps
=
self
.
_
decoder
(
maps
)
decoded_maps
=
self
.
decoder
(
maps
)
raw_predictions
=
self
.
_
head
(
decoded_maps
)
raw_predictions
=
self
.
head
(
decoded_maps
)
if
training
:
if
training
:
return
{
"
raw_output
"
:
raw_predictions
}
return
{
'
raw_output
'
:
raw_predictions
}
else
:
else
:
# Post-processing.
# Post-processing.
predictions
=
self
.
_
detection_generator
(
raw_predictions
)
predictions
=
self
.
detection_generator
(
raw_predictions
)
predictions
.
update
({
"
raw_output
"
:
raw_predictions
})
predictions
.
update
({
'
raw_output
'
:
raw_predictions
})
return
predictions
return
predictions
@
property
@
property
...
@@ -141,28 +89,22 @@ class Yolo(tf.keras.Model):
...
@@ -141,28 +89,22 @@ class Yolo(tf.keras.Model):
def
from_config
(
cls
,
config
):
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
return
cls
(
**
config
)
def
get_weight_groups
(
self
,
train_vars
):
@
property
"""Sort the list of trainable variables into groups for optimization.
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
Union
[
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]]:
Args:
"""Returns a dictionary of items to be additionally checkpointed."""
train_vars: a list of tf.Variables that need to get sorted into their
items
=
dict
(
backbone
=
self
.
backbone
,
head
=
self
.
head
)
respective groups.
if
self
.
decoder
is
not
None
:
items
.
update
(
decoder
=
self
.
decoder
)
Returns:
return
items
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
def
fuse
(
self
):
other: a list of tf.Variables for the other operations.
"""Fuses all Convolution and Batchnorm layers to get better latency."""
"""
print
(
'Fusing Conv Batch Norm Layers.'
)
bias
=
[]
if
not
self
.
_fused
:
weights
=
[]
self
.
_fused
=
True
other
=
[]
for
layer
in
self
.
submodules
:
for
var
in
train_vars
:
if
isinstance
(
layer
,
nn_blocks
.
ConvBN
):
if
"bias"
in
var
.
name
:
layer
.
fuse
()
bias
.
append
(
var
)
self
.
summary
()
elif
"beta"
in
var
.
name
:
return
bias
.
append
(
var
)
elif
"kernel"
in
var
.
name
or
"weight"
in
var
.
name
:
weights
.
append
(
var
)
else
:
other
.
append
(
var
)
return
weights
,
bias
,
other
official/vision/beta/projects/yolo/ops/mosaic.py
View file @
b261ebb4
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
"""Mosaic op."""
"""Mosaic op."""
import
random
import
random
import
tensorflow
as
tf
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
import
tensorflow_addons
as
tfa
...
@@ -55,7 +54,7 @@ class Mosaic:
...
@@ -55,7 +54,7 @@ class Mosaic:
the images should be preserved.
the images should be preserved.
jitter: `float` for the maximum change in aspect ratio expected in each
jitter: `float` for the maximum change in aspect ratio expected in each
preprocessing step.
preprocessing step.
mosaic_crop_mode: `str` the
y
type of mosaic to apply. The options are
mosaic_crop_mode: `str` the type of mosaic to apply. The options are
{crop, scale, None}, crop will construct a mosaic by slicing images
{crop, scale, None}, crop will construct a mosaic by slicing images
togther, scale will create a mosaic by concatnating and shifting the
togther, scale will create a mosaic by concatnating and shifting the
image, and None will default to scale and apply no post processing to
image, and None will default to scale and apply no post processing to
...
@@ -325,6 +324,12 @@ class Mosaic:
...
@@ -325,6 +324,12 @@ class Mosaic:
else
:
else
:
return
self
.
_add_param
(
noop
)
return
self
.
_add_param
(
noop
)
def
_beta
(
self
,
alpha
,
beta
):
"""Generates a random number using the beta distribution."""
a
=
tf
.
random
.
gamma
([],
alpha
)
b
=
tf
.
random
.
gamma
([],
beta
)
return
b
/
(
a
+
b
)
def
_mixup
(
self
,
one
,
two
):
def
_mixup
(
self
,
one
,
two
):
"""Blend together 2 images for the mixup data augmentation."""
"""Blend together 2 images for the mixup data augmentation."""
if
self
.
_mixup_frequency
>=
1.0
:
if
self
.
_mixup_frequency
>=
1.0
:
...
@@ -337,8 +342,8 @@ class Mosaic:
...
@@ -337,8 +342,8 @@ class Mosaic:
if
domo
>=
(
1
-
self
.
_mixup_frequency
):
if
domo
>=
(
1
-
self
.
_mixup_frequency
):
sample
=
one
sample
=
one
otype
=
one
[
'image'
].
dtype
otype
=
one
[
'image'
].
dtype
r
=
preprocessing_ops
.
random_uniform_strong
(
0.4
,
0.6
,
tf
.
float32
,
seed
=
self
.
_seed
)
r
=
self
.
_beta
(
8.0
,
8.0
)
sample
[
'image'
]
=
(
sample
[
'image'
]
=
(
r
*
tf
.
cast
(
one
[
'image'
],
tf
.
float32
)
+
r
*
tf
.
cast
(
one
[
'image'
],
tf
.
float32
)
+
(
1
-
r
)
*
tf
.
cast
(
two
[
'image'
],
tf
.
float32
))
(
1
-
r
)
*
tf
.
cast
(
two
[
'image'
],
tf
.
float32
))
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment