Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
6a55ecde
Commit
6a55ecde
authored
Oct 11, 2021
by
A. Unique TensorFlower
Browse files
Merge pull request #10286 from PurdueDualityLab:task_pr
PiperOrigin-RevId: 402338060
parents
2d353306
379d64c5
Changes
30
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1177 additions
and
156 deletions
+1177
-156
official/vision/beta/projects/yolo/README.md
official/vision/beta/projects/yolo/README.md
+6
-2
official/vision/beta/projects/yolo/common/registry_imports.py
...cial/vision/beta/projects/yolo/common/registry_imports.py
+15
-0
official/vision/beta/projects/yolo/configs/decoders.py
official/vision/beta/projects/yolo/configs/decoders.py
+46
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/csp_darknet53.yaml
...jects/yolo/configs/experiments/darknet/csp_darknet53.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/csp_darknet53_tfds.yaml
.../yolo/configs/experiments/darknet/csp_darknet53_tfds.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/darknet53.yaml
.../projects/yolo/configs/experiments/darknet/darknet53.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/darknet/darknet53_tfds.yaml
...ects/yolo/configs/experiments/darknet/darknet53_tfds.yaml
+0
-0
official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
...projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
+74
-0
official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
...eta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
+138
-0
official/vision/beta/projects/yolo/configs/yolo.py
official/vision/beta/projects/yolo/configs/yolo.py
+510
-0
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+4
-3
official/vision/beta/projects/yolo/losses/yolo_loss.py
official/vision/beta/projects/yolo/losses/yolo_loss.py
+77
-47
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
+1
-1
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+3
-0
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+119
-0
official/vision/beta/projects/yolo/modeling/factory.py
official/vision/beta/projects/yolo/modeling/factory.py
+95
-0
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
...beta/projects/yolo/modeling/layers/detection_generator.py
+5
-4
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
...al/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+39
-1
official/vision/beta/projects/yolo/modeling/yolo_model.py
official/vision/beta/projects/yolo/modeling/yolo_model.py
+36
-94
official/vision/beta/projects/yolo/ops/mosaic.py
official/vision/beta/projects/yolo/ops/mosaic.py
+9
-4
No files found.
official/vision/beta/projects/yolo/README.md
View file @
6a55ecde
...
...
@@ -73,10 +73,14 @@ connected to a new, more powerful backbone if a person chose to.
| Yolo-v3 spp |
| Yolo-v4 |
| Yolo-v4 tiny |
| Yolo-v4 csp |
| Yolo-v4 large |
## Models Zoo
## Requirements
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
## Requirements
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.6.0)
[

](https://www.python.org/downloads/release/python-380/)
...
...
official/vision/beta/projects/yolo/common/registry_imports.py
View file @
6a55ecde
...
...
@@ -15,7 +15,22 @@
"""All necessary imports for registration."""
# pylint: disable=unused-import
# pylint: disable=g-bad-import-order
from
official.common
import
registry_imports
# import configs
from
official.vision.beta.projects.yolo.configs
import
darknet_classification
from
official.vision.beta.projects.yolo.configs
import
yolo
as
yolo_config
# import modeling components
from
official.vision.beta.projects.yolo.modeling.backbones
import
darknet
from
official.vision.beta.projects.yolo.modeling.decoders
import
yolo_decoder
# import tasks
from
official.vision.beta.projects.yolo.tasks
import
image_classification
from
official.vision.beta.projects.yolo.tasks
import
yolo
as
yolo_task
# import optimization packages
from
official.vision.beta.projects.yolo.optimization
import
optimizer_factory
from
official.vision.beta.projects.yolo.optimization.configs
import
optimizer_config
from
official.vision.beta.projects.yolo.optimization.configs
import
optimization_config
official/vision/beta/projects/yolo/configs/decoders.py
0 → 100755
View file @
6a55ecde
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoders configurations."""
import
dataclasses
from
typing
import
Optional
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
decoders
@
dataclasses
.
dataclass
class
YoloDecoder
(
hyperparams
.
Config
):
"""Builds Yolo decoder.
If the name is specified, or version is specified we ignore input parameters
and use version and name defaults.
"""
version
:
Optional
[
str
]
=
None
type
:
Optional
[
str
]
=
None
use_fpn
:
Optional
[
bool
]
=
None
use_spatial_attention
:
bool
=
False
use_separable_conv
:
bool
=
False
csp_stack
:
Optional
[
bool
]
=
None
fpn_depth
:
Optional
[
int
]
=
None
fpn_filter_scale
:
Optional
[
int
]
=
None
path_process_len
:
Optional
[
int
]
=
None
max_level_process_len
:
Optional
[
int
]
=
None
embed_spp
:
Optional
[
bool
]
=
None
activation
:
Optional
[
str
]
=
'same'
@
dataclasses
.
dataclass
class
Decoder
(
decoders
.
Decoder
):
type
:
Optional
[
str
]
=
'yolo_decoder'
yolo_decoder
:
YoloDecoder
=
YoloDecoder
()
official/vision/beta/projects/yolo/configs/experiments/csp_darknet53.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
csp_darknet53.yaml
View file @
6a55ecde
File moved
official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
csp_darknet53_tfds.yaml
View file @
6a55ecde
File moved
official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
darknet53.yaml
View file @
6a55ecde
File moved
official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
→
official/vision/beta/projects/yolo/configs/experiments/
darknet/
darknet53_tfds.yaml
View file @
6a55ecde
File moved
official/vision/beta/projects/yolo/configs/experiments/yolov4-csp/tpu/640.yaml
0 → 100644
View file @
6a55ecde
# --experiment_type=scaled_yolo
# mAP 47.6
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
float32'
tpu_enable_xla_dynamic_padder
:
false
task
:
model
:
input_size
:
[
640
,
640
,
3
]
backbone
:
type
:
'
darknet'
darknet
:
model_id
:
'
altered_cspdarknet53'
max_level
:
5
min_level
:
3
decoder
:
type
:
yolo_decoder
yolo_decoder
:
version
:
v4
type
:
csp
head
:
smart_bias
:
true
detection_generator
:
box_type
:
'
all'
:
scaled
scale_xy
:
'
all'
:
2.0
max_boxes
:
300
nms_type
:
iou
iou_thresh
:
0.001
nms_thresh
:
0.60
loss
:
use_scaled_loss
:
true
update_on_repeat
:
true
box_loss_type
:
'
all'
:
ciou
ignore_thresh
:
'
all'
:
0.0
iou_normalizer
:
'
all'
:
0.05
cls_normalizer
:
'
all'
:
0.3
object_normalizer
:
'
5'
:
0.28
'
4'
:
0.70
'
3'
:
2.80
objectness_smooth
:
'
all'
:
1.0
norm_activation
:
use_sync_bn
:
true
num_classes
:
80
anchor_boxes
:
anchors_per_scale
:
3
boxes
:
[
box
:
[
12
,
16
],
box
:
[
19
,
36
],
box
:
[
40
,
28
],
box
:
[
36
,
75
],
box
:
[
76
,
55
],
box
:
[
72
,
146
],
box
:
[
142
,
110
],
box
:
[
192
,
243
],
box
:
[
459
,
401
]]
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size
:
10000
parser
:
mosaic
:
mosaic_frequency
:
1.0
mixup_frequency
:
0.0
mosaic_crop_mode
:
'
scale'
mosaic_center
:
0.25
aug_scale_min
:
0.1
aug_scale_max
:
1.9
max_num_instances
:
300
letter_box
:
true
random_flip
:
true
aug_rand_translate
:
0.1
area_thresh
:
0.1
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
official/vision/beta/projects/yolo/configs/experiments/yolov4/tpu/512.yaml
0 → 100755
View file @
6a55ecde
# --experiment_type=yolo_darknet
# mAP 43.0
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
smart_bias_lr
:
0.0
model
:
darknet_based_model
:
true
input_size
:
[
512
,
512
,
3
]
backbone
:
type
:
'
darknet'
darknet
:
model_id
:
'
cspdarknet53'
max_level
:
5
min_level
:
3
decoder
:
type
:
yolo_decoder
yolo_decoder
:
version
:
v4
type
:
regular
activation
:
leaky
head
:
smart_bias
:
true
detection_generator
:
box_type
:
'
all'
:
original
scale_xy
:
'
5'
:
1.05
'
4'
:
1.1
'
3'
:
1.2
max_boxes
:
200
nms_type
:
iou
iou_thresh
:
0.001
nms_thresh
:
0.60
loss
:
use_scaled_loss
:
false
box_loss_type
:
'
all'
:
ciou
ignore_thresh
:
'
all'
:
0.7
iou_normalizer
:
'
all'
:
0.07
cls_normalizer
:
'
all'
:
1.0
object_normalizer
:
'
all'
:
1.0
objectness_smooth
:
'
all'
:
0.0
max_delta
:
'
all'
:
5.0
norm_activation
:
activation
:
mish
norm_epsilon
:
0.0001
norm_momentum
:
0.99
use_sync_bn
:
true
num_classes
:
80
anchor_boxes
:
anchors_per_scale
:
3
boxes
:
[
box
:
[
12
,
16
],
box
:
[
19
,
36
],
box
:
[
40
,
28
],
box
:
[
36
,
75
],
box
:
[
76
,
55
],
box
:
[
72
,
146
],
box
:
[
142
,
110
],
box
:
[
192
,
243
],
box
:
[
459
,
401
]]
train_data
:
global_batch_size
:
64
dtype
:
float32
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
is_training
:
true
drop_remainder
:
true
seed
:
1000
parser
:
mosaic
:
mosaic_frequency
:
0.75
mixup_frequency
:
0.0
mosaic_crop_mode
:
'
crop'
mosaic_center
:
0.2
aug_scale_min
:
0.2
aug_scale_max
:
1.6
jitter
:
0.3
max_num_instances
:
200
letter_box
:
false
random_flip
:
true
aug_rand_saturation
:
1.5
aug_rand_brightness
:
1.5
aug_rand_hue
:
0.1
aug_scale_min
:
0.1
aug_scale_max
:
1.9
aug_rand_translate
:
0.0
jitter
:
0.3
area_thresh
:
0.1
random_pad
:
true
use_tie_breaker
:
true
anchor_thresh
:
0.4
validation_data
:
global_batch_size
:
8
dtype
:
float32
input_path
:
'
/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
is_training
:
false
drop_remainder
:
true
parser
:
max_num_instances
:
200
letter_box
:
false
use_tie_breaker
:
true
anchor_thresh
:
0.4
weight_decay
:
0.000
init_checkpoint
:
'
gs://tf_model_garden/vision/yolo/ckpt-15000'
init_checkpoint_modules
:
'
backbone'
annotation_file
:
null
trainer
:
train_steps
:
555000
validation_steps
:
625
steps_per_loop
:
1850
summary_interval
:
1850
validation_interval
:
9250
checkpoint_interval
:
1850
optimizer_config
:
ema
:
average_decay
:
0.9998
trainable_weights_only
:
false
dynamic_decay
:
true
learning_rate
:
type
:
stepwise
stepwise
:
boundaries
:
[
400000
]
name
:
PiecewiseConstantDecay
values
:
[
0.00131
,
0.000131
]
optimizer
:
type
:
sgd_torch
sgd_torch
:
momentum
:
0.949
momentum_start
:
0.949
nesterov
:
true
warmup_steps
:
1000
weight_decay
:
0.0005
name
:
SGD
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
1000
# learning rate rises from 0 to 0.0013 over 1000 steps
official/vision/beta/projects/yolo/configs/yolo.py
0 → 100755
View file @
6a55ecde
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YOLO configuration definition."""
import
dataclasses
import
os
from
typing
import
Any
,
List
,
Optional
,
Union
import
numpy
as
np
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
common
from
official.vision.beta.projects.yolo
import
optimization
from
official.vision.beta.projects.yolo.configs
import
backbones
from
official.vision.beta.projects.yolo.configs
import
decoders
# pytype: disable=annotation-type-mismatch
MIN_LEVEL
=
1
MAX_LEVEL
=
7
GLOBAL_SEED
=
1000
def
_build_dict
(
min_level
,
max_level
,
value
):
vals
=
{
str
(
key
):
value
for
key
in
range
(
min_level
,
max_level
+
1
)}
vals
[
'all'
]
=
None
return
lambda
:
vals
def
_build_path_scales
(
min_level
,
max_level
):
return
lambda
:
{
str
(
key
):
2
**
key
for
key
in
range
(
min_level
,
max_level
+
1
)}
@
dataclasses
.
dataclass
class
FPNConfig
(
hyperparams
.
Config
):
"""FPN config."""
all
:
Optional
[
Any
]
=
None
def
get
(
self
):
"""Allow for a key for each level or a single key for all the levels."""
values
=
self
.
as_dict
()
if
'all'
in
values
and
values
[
'all'
]
is
not
None
:
for
key
in
values
:
if
key
!=
'all'
:
values
[
key
]
=
values
[
'all'
]
return
values
# pylint: disable=missing-class-docstring
@
dataclasses
.
dataclass
class
TfExampleDecoder
(
hyperparams
.
Config
):
regenerate_source_id
:
bool
=
False
coco91_to_80
:
bool
=
True
@
dataclasses
.
dataclass
class
TfExampleDecoderLabelMap
(
hyperparams
.
Config
):
regenerate_source_id
:
bool
=
False
label_map
:
str
=
''
@
dataclasses
.
dataclass
class
DataDecoder
(
hyperparams
.
OneOfConfig
):
type
:
Optional
[
str
]
=
'simple_decoder'
simple_decoder
:
TfExampleDecoder
=
TfExampleDecoder
()
label_map_decoder
:
TfExampleDecoderLabelMap
=
TfExampleDecoderLabelMap
()
@
dataclasses
.
dataclass
class
Mosaic
(
hyperparams
.
Config
):
mosaic_frequency
:
float
=
0.0
mixup_frequency
:
float
=
0.0
mosaic_center
:
float
=
0.2
mosaic_crop_mode
:
Optional
[
str
]
=
None
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
jitter
:
float
=
0.0
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
max_num_instances
:
int
=
200
letter_box
:
Optional
[
bool
]
=
True
random_flip
:
bool
=
True
random_pad
:
float
=
False
jitter
:
float
=
0.0
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
aug_rand_saturation
:
float
=
0.0
aug_rand_brightness
:
float
=
0.0
aug_rand_hue
:
float
=
0.0
aug_rand_angle
:
float
=
0.0
aug_rand_translate
:
float
=
0.0
aug_rand_perspective
:
float
=
0.0
use_tie_breaker
:
bool
=
True
best_match_only
:
bool
=
False
anchor_thresh
:
float
=
-
0.01
area_thresh
:
float
=
0.1
mosaic
:
Mosaic
=
Mosaic
()
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
global_batch_size
:
int
=
64
input_path
:
str
=
''
tfds_name
:
str
=
''
tfds_split
:
str
=
''
global_batch_size
:
int
=
1
is_training
:
bool
=
True
dtype
:
str
=
'float16'
decoder
:
DataDecoder
=
DataDecoder
()
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10000
tfds_download
:
bool
=
True
cache
:
bool
=
False
drop_remainder
:
bool
=
True
@
dataclasses
.
dataclass
class
YoloHead
(
hyperparams
.
Config
):
"""Parameterization for the YOLO Head."""
smart_bias
:
bool
=
True
@
dataclasses
.
dataclass
class
YoloDetectionGenerator
(
hyperparams
.
Config
):
box_type
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
'original'
))
scale_xy
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
path_scales
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_path_scales
(
MIN_LEVEL
,
MAX_LEVEL
))
nms_type
:
str
=
'greedy'
iou_thresh
:
float
=
0.001
nms_thresh
:
float
=
0.6
max_boxes
:
int
=
200
pre_nms_points
:
int
=
5000
@
dataclasses
.
dataclass
class
YoloLoss
(
hyperparams
.
Config
):
ignore_thresh
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
0.0
))
truth_thresh
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
box_loss_type
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
'ciou'
))
iou_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
cls_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
object_normalizer
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
1.0
))
max_delta
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
np
.
inf
))
objectness_smooth
:
FPNConfig
=
dataclasses
.
field
(
default_factory
=
_build_dict
(
MIN_LEVEL
,
MAX_LEVEL
,
0.0
))
label_smoothing
:
float
=
0.0
use_scaled_loss
:
bool
=
True
update_on_repeat
:
bool
=
True
@
dataclasses
.
dataclass
class
Box
(
hyperparams
.
Config
):
box
:
List
[
int
]
=
dataclasses
.
field
(
default
=
list
)
@
dataclasses
.
dataclass
class
AnchorBoxes
(
hyperparams
.
Config
):
boxes
:
Optional
[
List
[
Box
]]
=
None
level_limits
:
Optional
[
List
[
int
]]
=
None
anchors_per_scale
:
int
=
3
def
get
(
self
,
min_level
,
max_level
):
"""Distribute them in order to each level.
Args:
min_level: `int` the lowest output level.
max_level: `int` the heighest output level.
Returns:
anchors_per_level: A `Dict[List[int]]` of the anchor boxes for each level.
self.level_limits: A `List[int]` of the box size limits to link to each
level under anchor free conditions.
"""
if
self
.
level_limits
is
None
:
boxes
=
[
box
.
box
for
box
in
self
.
boxes
]
else
:
boxes
=
[[
1.0
,
1.0
]]
*
((
max_level
-
min_level
)
+
1
)
self
.
anchors_per_scale
=
1
anchors_per_level
=
dict
()
start
=
0
for
i
in
range
(
min_level
,
max_level
+
1
):
anchors_per_level
[
str
(
i
)]
=
boxes
[
start
:
start
+
self
.
anchors_per_scale
]
start
+=
self
.
anchors_per_scale
return
anchors_per_level
,
self
.
level_limits
@
dataclasses
.
dataclass
class
Yolo
(
hyperparams
.
Config
):
input_size
:
Optional
[
List
[
int
]]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
512
,
512
,
3
])
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'darknet'
,
darknet
=
backbones
.
Darknet
(
model_id
=
'cspdarknet53'
))
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'yolo_decoder'
,
yolo_decoder
=
decoders
.
YoloDecoder
(
version
=
'v4'
,
type
=
'regular'
))
head
:
YoloHead
=
YoloHead
()
detection_generator
:
YoloDetectionGenerator
=
YoloDetectionGenerator
()
loss
:
YoloLoss
=
YoloLoss
()
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
activation
=
'mish'
,
use_sync_bn
=
True
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
)
num_classes
:
int
=
80
anchor_boxes
:
AnchorBoxes
=
AnchorBoxes
()
darknet_based_model
:
bool
=
False
@
dataclasses
.
dataclass
class
YoloTask
(
cfg
.
TaskConfig
):
per_category_metrics
:
bool
=
False
smart_bias_lr
:
float
=
0.0
model
:
Yolo
=
Yolo
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
weight_decay
:
float
=
0.0
annotation_file
:
Optional
[
str
]
=
None
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
Union
[
str
,
List
[
str
]]
=
'all'
# all, backbone, and/or decoder
gradient_clip_norm
:
float
=
0.0
seed
=
GLOBAL_SEED
COCO_INPUT_PATH_BASE
=
'coco'
COCO_TRAIN_EXAMPLES
=
118287
COCO_VAL_EXAMPLES
=
5000
@
exp_factory
.
register_config_factory
(
'yolo'
)
def
yolo
()
->
cfg
.
ExperimentConfig
:
"""Yolo general config."""
return
cfg
.
ExperimentConfig
(
task
=
YoloTask
(),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
@
exp_factory
.
register_config_factory
(
'yolo_darknet'
)
def
yolo_darknet
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with YOLOv3 and v4."""
train_batch_size
=
64
eval_batch_size
=
8
train_epochs
=
300
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
validation_interval
=
5
max_num_instances
=
200
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
YoloTask
(
smart_bias_lr
=
0.1
,
init_checkpoint
=
''
,
init_checkpoint_modules
=
'backbone'
,
annotation_file
=
None
,
weight_decay
=
0.0
,
model
=
Yolo
(
darknet_based_model
=
True
,
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
),
head
=
YoloHead
(
smart_bias
=
True
),
loss
=
YoloLoss
(
use_scaled_loss
=
False
,
update_on_repeat
=
True
),
anchor_boxes
=
AnchorBoxes
(
anchors_per_scale
=
3
,
boxes
=
[
Box
(
box
=
[
12
,
16
]),
Box
(
box
=
[
19
,
36
]),
Box
(
box
=
[
40
,
28
]),
Box
(
box
=
[
36
,
75
]),
Box
(
box
=
[
76
,
55
]),
Box
(
box
=
[
72
,
146
]),
Box
(
box
=
[
142
,
110
]),
Box
(
box
=
[
192
,
243
]),
Box
(
box
=
[
459
,
401
])
])),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
False
,
aug_rand_saturation
=
1.5
,
aug_rand_brightness
=
1.5
,
aug_rand_hue
=
0.1
,
use_tie_breaker
=
True
,
best_match_only
=
False
,
anchor_thresh
=
0.4
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
mosaic
=
Mosaic
(
mosaic_frequency
=
0.75
,
mixup_frequency
=
0.0
,
mosaic_crop_mode
=
'crop'
,
mosaic_center
=
0.2
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
True
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
False
,
use_tie_breaker
=
True
,
best_match_only
=
False
,
anchor_thresh
=
0.4
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
))),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
train_epochs
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
validation_interval
*
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'ema'
:
{
'average_decay'
:
0.9998
,
'trainable_weights_only'
:
False
,
'dynamic_decay'
:
True
,
},
'optimizer'
:
{
'type'
:
'sgd_torch'
,
'sgd_torch'
:
{
'momentum'
:
0.949
,
'momentum_start'
:
0.949
,
'nesterov'
:
True
,
'warmup_steps'
:
1000
,
'weight_decay'
:
0.0005
,
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
240
*
steps_per_epoch
],
'values'
:
[
0.00131
*
train_batch_size
/
64.0
,
0.000131
*
train_batch_size
/
64.0
,
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
1000
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'scaled_yolo'
)
def
scaled_yolo
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with YOLOv4-csp and v4."""
train_batch_size
=
64
eval_batch_size
=
8
train_epochs
=
300
warmup_epochs
=
3
validation_interval
=
5
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
max_num_instances
=
300
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
YoloTask
(
smart_bias_lr
=
0.1
,
init_checkpoint_modules
=
''
,
annotation_file
=
None
,
weight_decay
=
0.0
,
model
=
Yolo
(
darknet_based_model
=
False
,
norm_activation
=
common
.
NormActivation
(
activation
=
'mish'
,
use_sync_bn
=
True
,
norm_epsilon
=
0.0001
,
norm_momentum
=
0.97
),
head
=
YoloHead
(
smart_bias
=
True
),
loss
=
YoloLoss
(
use_scaled_loss
=
True
),
anchor_boxes
=
AnchorBoxes
(
anchors_per_scale
=
3
,
boxes
=
[
Box
(
box
=
[
12
,
16
]),
Box
(
box
=
[
19
,
36
]),
Box
(
box
=
[
40
,
28
]),
Box
(
box
=
[
36
,
75
]),
Box
(
box
=
[
76
,
55
]),
Box
(
box
=
[
72
,
146
]),
Box
(
box
=
[
142
,
110
]),
Box
(
box
=
[
192
,
243
]),
Box
(
box
=
[
459
,
401
])
])),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
dtype
=
'float32'
,
parser
=
Parser
(
aug_rand_saturation
=
0.7
,
aug_rand_brightness
=
0.4
,
aug_rand_hue
=
0.015
,
letter_box
=
True
,
use_tie_breaker
=
True
,
best_match_only
=
True
,
anchor_thresh
=
4.0
,
random_pad
=
False
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
mosaic
=
Mosaic
(
mosaic_crop_mode
=
'scale'
,
mosaic_frequency
=
1.0
,
mixup_frequency
=
0.0
,
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
True
,
dtype
=
'float32'
,
parser
=
Parser
(
letter_box
=
True
,
use_tie_breaker
=
True
,
best_match_only
=
True
,
anchor_thresh
=
4.0
,
area_thresh
=
0.1
,
max_num_instances
=
max_num_instances
,
))),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
train_epochs
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
validation_interval
*
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'ema'
:
{
'average_decay'
:
0.9999
,
'trainable_weights_only'
:
False
,
'dynamic_decay'
:
True
,
},
'optimizer'
:
{
'type'
:
'sgd_torch'
,
'sgd_torch'
:
{
'momentum'
:
0.937
,
'momentum_start'
:
0.8
,
'nesterov'
:
True
,
'warmup_steps'
:
steps_per_epoch
*
warmup_epochs
,
'weight_decay'
:
0.0005
*
train_batch_size
/
64.0
,
}
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
0.01
,
'alpha'
:
0.2
,
'decay_steps'
:
train_epochs
*
steps_per_epoch
,
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
steps_per_epoch
*
warmup_epochs
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
View file @
6a55ecde
...
...
@@ -75,11 +75,11 @@ class Parser(parser.Parser):
saturation. saturation will be scaled between 1/value and value.
aug_rand_brightness: `float` indicating the maximum scaling value for
brightness. brightness will be scaled between 1/value and value.
letter_box: `boolean` indicating whether upon start of the datapipeline
letter_box: `boolean` indicating whether upon start of the data
pipeline
regardless of the preprocessing ops that are used, the aspect ratio of
the images should be preserved.
random_pad: `bool` indiccating wether to use padding to apply random
translation true for darknet yolo false for scaled yolo.
translation
,
true for darknet yolo false for scaled yolo.
random_flip: `boolean` indicating whether or not to randomly flip the
image horizontally.
jitter: `float` for the maximum change in aspect ratio expected in each
...
...
@@ -147,6 +147,7 @@ class Parser(parser.Parser):
# Set the per level values needed for operation
self
.
_darknet
=
darknet
self
.
_area_thresh
=
area_thresh
self
.
_level_limits
=
level_limits
self
.
_seed
=
seed
self
.
_dtype
=
dtype
...
...
@@ -259,7 +260,7 @@ class Parser(parser.Parser):
self
.
_aug_rand_saturation
,
self
.
_aug_rand_brightness
,
seed
=
self
.
_seed
,
darknet
=
self
.
_darknet
)
darknet
=
self
.
_darknet
or
self
.
_level_limits
is
not
None
)
# Cast the image to the selcted datatype.
image
,
labels
=
self
.
_build_label
(
...
...
official/vision/beta/projects/yolo/losses/yolo_loss.py
View file @
6a55ecde
...
...
@@ -40,7 +40,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
loss_type
=
'ciou'
,
iou_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
obj
ect
_normalizer
=
1.0
,
label_smoothing
=
0.0
,
objectness_smooth
=
True
,
update_on_repeat
=
False
,
...
...
@@ -65,7 +65,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
label_smoothing: `float` for how much to smooth the loss on the classes.
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
...
...
@@ -90,7 +91,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
self
.
_iou_normalizer
=
iou_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_obj_normalizer
=
obj_normalizer
self
.
_obj
ect
_normalizer
=
obj
ect
_normalizer
self
.
_scale_x_y
=
scale_x_y
self
.
_max_delta
=
max_delta
...
...
@@ -240,9 +241,14 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
Returns:
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
del
box_loss
,
conf_loss
,
class_loss
,
ground_truths
,
predictions
return
loss
del
box_loss
del
conf_loss
del
class_loss
del
ground_truths
del
predictions
return
loss
,
tf
.
ones_like
(
loss
)
@
abc
.
abstractmethod
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
...
...
@@ -349,16 +355,16 @@ class DarknetLoss(YoloLossBase):
tf
.
cast
(
true_class
,
tf
.
int32
),
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
dtype
=
pred_class
.
dtype
)
true_class
es
=
tf
.
stop_gradient
(
loss_utils
.
apply_mask
(
ind_mask
,
true_class
))
true_class
=
tf
.
stop_gradient
(
loss_utils
.
apply_mask
(
ind_mask
,
true_class
))
# Reorganize the one hot class list as a grid.
true_class
=
loss_utils
.
build_grid
(
inds
,
true_class
es
,
pred_class
,
ind_mask
,
update
=
False
)
true_class
=
tf
.
stop_gradient
(
true_class
)
true_class
_grid
=
loss_utils
.
build_grid
(
inds
,
true_class
,
pred_class
,
ind_mask
,
update
=
False
)
true_class
_grid
=
tf
.
stop_gradient
(
true_class
_grid
)
# Use the class mask to find the number of objects located in
# each predicted grid cell/pixel.
counts
=
true_class
counts
=
true_class
_grid
counts
=
tf
.
reduce_sum
(
counts
,
axis
=-
1
,
keepdims
=
True
)
reps
=
tf
.
gather_nd
(
counts
,
inds
,
batch_dims
=
1
)
reps
=
tf
.
squeeze
(
reps
,
axis
=-
1
)
...
...
@@ -372,19 +378,26 @@ class DarknetLoss(YoloLossBase):
box_loss
=
math_ops
.
divide_no_nan
(
box_loss
,
reps
)
box_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
box_loss
,
axis
=
1
),
dtype
=
y_pred
.
dtype
)
if
self
.
_update_on_repeat
:
# Converts list of gound truths into a grid where repeated values
# are replaced by the most recent value. So some class identities may
# get lost but the loss computation will be more stable. Results are
# more consistent.
# Compute the sigmoid binary cross entropy for the class maps.
class_loss
=
tf
.
reduce_mean
(
loss_utils
.
sigmoid_bce
(
tf
.
expand_dims
(
true_class
,
axis
=-
1
),
tf
.
expand_dims
(
true_class
_grid
,
axis
=-
1
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
self
.
_label_smoothing
),
axis
=-
1
)
# Apply normalization to the class losses.
if
self
.
_cls_normalizer
<
1.0
:
# Build a mask based on the true class locations.
cls_norm_mask
=
true_class
cls_norm_mask
=
true_class
_grid
# Apply the classes weight to class indexes were one_hot is one.
class_loss
*=
((
1
-
cls_norm_mask
)
+
cls_norm_mask
*
self
.
_cls_normalizer
)
class_loss
*=
((
1
-
cls_norm_mask
)
+
cls_norm_mask
*
self
.
_cls_normalizer
)
# Mask to the class loss and compute the sum over all the objects.
class_loss
=
tf
.
reduce_sum
(
class_loss
,
axis
=-
1
)
...
...
@@ -392,6 +405,23 @@ class DarknetLoss(YoloLossBase):
class_loss
=
math_ops
.
rm_nan_inf
(
class_loss
,
val
=
0.0
)
class_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
,
3
)),
dtype
=
y_pred
.
dtype
)
else
:
# Computes the loss while keeping the structure as a list in
# order to ensure all objects are considered. In some cases can
# make training more unstable but may also return higher APs.
pred_class
=
loss_utils
.
apply_mask
(
ind_mask
,
tf
.
gather_nd
(
pred_class
,
inds
,
batch_dims
=
1
))
class_loss
=
tf
.
keras
.
losses
.
binary_crossentropy
(
tf
.
expand_dims
(
true_class
,
axis
=-
1
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
label_smoothing
=
self
.
_label_smoothing
,
from_logits
=
True
)
class_loss
=
loss_utils
.
apply_mask
(
ind_mask
,
class_loss
)
class_loss
=
math_ops
.
divide_no_nan
(
class_loss
,
tf
.
expand_dims
(
reps
,
axis
=-
1
))
class_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
)),
dtype
=
y_pred
.
dtype
)
class_loss
*=
self
.
_cls_normalizer
# Compute the sigmoid binary cross entropy for the confidence maps.
bce
=
tf
.
reduce_mean
(
...
...
@@ -406,7 +436,7 @@ class DarknetLoss(YoloLossBase):
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
conf_loss
*=
self
.
_obj_normalizer
conf_loss
*=
self
.
_obj
ect
_normalizer
# Add all the losses together then take the mean over the batches.
loss
=
box_loss
+
class_loss
+
conf_loss
...
...
@@ -547,7 +577,7 @@ class ScaledLoss(YoloLossBase):
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
class_loss
*=
self
.
_cls_normalizer
conf_loss
*=
self
.
_obj_normalizer
conf_loss
*=
self
.
_obj
ect
_normalizer
# Add all the losses together then take the sum over the batches.
mean_loss
=
box_loss
+
class_loss
+
conf_loss
...
...
@@ -575,12 +605,13 @@ class ScaledLoss(YoloLossBase):
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
scale
=
tf
.
stop_gradient
(
3
/
len
(
list
(
predictions
.
keys
())))
return
loss
*
scale
return
loss
*
scale
,
1
/
scale
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""
t
his method is not specific to each loss path, but each loss type."""
"""
T
his method is not specific to each loss path, but each loss type."""
return
loss
...
...
@@ -597,7 +628,7 @@ class YoloLoss:
loss_types
=
None
,
iou_normalizers
=
None
,
cls_normalizers
=
None
,
obj_normalizers
=
None
,
obj
ect
_normalizers
=
None
,
objectness_smooths
=
None
,
box_types
=
None
,
scale_xys
=
None
,
...
...
@@ -627,8 +658,8 @@ class YoloLoss:
or the boxes for each FPN path.
cls_normalizers: `Dict[float]` for how much to scale the loss on the
classes for each FPN path.
obj_normalizers: `Dict[float]` for how much to scale loss on the
detection
map for each FPN path.
obj
ect
_normalizers: `Dict[float]` for how much to scale loss on the
detection
map for each FPN path.
objectness_smooths: `Dict[float]` for how much to smooth the loss on the
detection map for each FPN path.
box_types: `Dict[bool]` for which scaling type to use for each FPN path.
...
...
@@ -666,7 +697,7 @@ class YoloLoss:
loss_type
=
loss_types
[
key
],
iou_normalizer
=
iou_normalizers
[
key
],
cls_normalizer
=
cls_normalizers
[
key
],
obj_normalizer
=
obj_normalizers
[
key
],
obj
ect
_normalizer
=
obj
ect
_normalizers
[
key
],
box_type
=
box_types
[
key
],
objectness_smooth
=
objectness_smooths
[
key
],
max_delta
=
max_deltas
[
key
],
...
...
@@ -695,10 +726,8 @@ class YoloLoss:
# after computing the loss, scale loss as needed for aggregation
# across FPN levels
loss
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss
,
loss_box
,
loss_conf
,
loss_class
,
ground_truth
,
predictions
)
loss
,
scale
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss
,
loss_box
,
loss_conf
,
loss_class
,
ground_truth
,
predictions
)
# after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas
...
...
@@ -708,12 +737,13 @@ class YoloLoss:
# detach all the below gradients: none of them should make a
# contribution to the gradient form this point forwards
metric_loss
+=
tf
.
stop_gradient
(
mean_loss
)
metric_dict
[
key
][
'loss'
]
=
tf
.
stop_gradient
(
mean_loss
)
metric_loss
+=
tf
.
stop_gradient
(
mean_loss
/
scale
)
metric_dict
[
key
][
'loss'
]
=
tf
.
stop_gradient
(
mean_loss
/
scale
)
metric_dict
[
key
][
'avg_iou'
]
=
tf
.
stop_gradient
(
avg_iou
)
metric_dict
[
key
][
'avg_obj'
]
=
tf
.
stop_gradient
(
avg_obj
)
metric_dict
[
'net'
][
'box'
]
+=
tf
.
stop_gradient
(
loss_box
)
metric_dict
[
'net'
][
'class'
]
+=
tf
.
stop_gradient
(
loss_class
)
metric_dict
[
'net'
][
'conf'
]
+=
tf
.
stop_gradient
(
loss_conf
)
metric_dict
[
'net'
][
'box'
]
+=
tf
.
stop_gradient
(
loss_box
/
scale
)
metric_dict
[
'net'
][
'class'
]
+=
tf
.
stop_gradient
(
loss_class
/
scale
)
metric_dict
[
'net'
][
'conf'
]
+=
tf
.
stop_gradient
(
loss_conf
/
scale
)
return
loss_val
,
metric_loss
,
metric_dict
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
View file @
6a55ecde
...
...
@@ -60,7 +60,7 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
loss_types
=
{
key
:
'ciou'
for
key
in
keys
},
iou_normalizers
=
{
key
:
0.05
for
key
in
keys
},
cls_normalizers
=
{
key
:
0.5
for
key
in
keys
},
obj_normalizers
=
{
key
:
1.0
for
key
in
keys
},
obj
ect
_normalizers
=
{
key
:
1.0
for
key
in
keys
},
objectness_smooths
=
{
key
:
1.0
for
key
in
keys
},
box_types
=
{
key
:
'scaled'
for
key
in
keys
},
scale_xys
=
{
key
:
2.0
for
key
in
keys
},
...
...
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
6a55ecde
...
...
@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
def
_build_struct
(
self
,
net
,
inputs
):
if
self
.
_use_reorg_input
:
inputs
=
nn_blocks
.
Reorg
()(
inputs
)
net
[
0
].
filters
=
net
[
1
].
filters
net
[
0
].
output_name
=
net
[
1
].
output_name
del
net
[
1
]
endpoints
=
collections
.
OrderedDict
()
stack_outputs
=
[
inputs
]
...
...
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
View file @
6a55ecde
...
...
@@ -13,10 +13,66 @@
# limitations under the License.
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
from
typing
import
Mapping
,
Union
,
Optional
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.vision.beta.modeling.decoders
import
factory
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
# model configurations
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS
=
{
'v4'
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
csp
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
5
,
fpn_depth
=
5
,
path_process_len
=
6
),
csp_large
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
7
,
fpn_depth
=
7
,
path_process_len
=
8
,
fpn_filter_scale
=
2
),
),
'v3'
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
spp
=
dict
(
embed_spp
=
True
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
),
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
_IdentityRoute
(
tf
.
keras
.
layers
.
Layer
):
...
...
@@ -487,3 +543,66 @@ class YoloDecoder(tf.keras.Model):
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
factory
.
register_decoder_builder
(
'yolo_decoder'
)
def
build_yolo_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
)
->
Union
[
None
,
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]:
"""Builds Yolo FPN/PAN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
**kwargs: Additional kwargs arguments.
Returns:
A `tf.keras.Model` instance of the Yolo FPN/PAN decoder.
"""
decoder_cfg
=
model_config
.
decoder
.
get
()
norm_activation_config
=
model_config
.
norm_activation
activation
=
(
decoder_cfg
.
activation
if
decoder_cfg
.
activation
!=
'same'
else
norm_activation_config
.
activation
)
if
decoder_cfg
.
version
is
None
:
# custom yolo
raise
ValueError
(
'Decoder version cannot be None, specify v3 or v4.'
)
if
decoder_cfg
.
version
not
in
YOLO_MODELS
:
raise
ValueError
(
'Unsupported model version please select from {v3, v4}, '
'or specify a custom decoder config using YoloDecoder in you yaml'
)
if
decoder_cfg
.
type
is
None
:
decoder_cfg
.
type
=
'regular'
if
decoder_cfg
.
type
not
in
YOLO_MODELS
[
decoder_cfg
.
version
]:
raise
ValueError
(
'Unsupported model type please select from '
'{yolo_model.YOLO_MODELS[decoder_cfg.version].keys()}'
'or specify a custom decoder config using YoloDecoder.'
)
base_model
=
YOLO_MODELS
[
decoder_cfg
.
version
][
decoder_cfg
.
type
]
cfg_dict
=
decoder_cfg
.
as_dict
()
for
key
in
base_model
:
if
cfg_dict
[
key
]
is
not
None
:
base_model
[
key
]
=
cfg_dict
[
key
]
base_dict
=
dict
(
activation
=
activation
,
use_spatial_attention
=
decoder_cfg
.
use_spatial_attention
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
base_model
.
update
(
base_dict
)
model
=
YoloDecoder
(
input_specs
,
**
base_model
,
**
kwargs
)
return
model
official/vision/beta/projects/yolo/modeling/factory.py
0 → 100644
View file @
6a55ecde
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common factory functions yolo neural networks."""
from
absl
import
logging
from
official.vision.beta.modeling.backbones
import
factory
as
backbone_factory
from
official.vision.beta.modeling.decoders
import
factory
as
decoder_factory
from
official.vision.beta.projects.yolo.configs
import
yolo
from
official.vision.beta.projects.yolo.modeling
import
yolo_model
from
official.vision.beta.projects.yolo.modeling.heads
import
yolo_head
from
official.vision.beta.projects.yolo.modeling.layers
import
detection_generator
def
build_yolo_detection_generator
(
model_config
:
yolo
.
Yolo
,
anchor_boxes
):
"""Builds yolo detection generator."""
model
=
detection_generator
.
YoloLayer
(
classes
=
model_config
.
num_classes
,
anchors
=
anchor_boxes
,
iou_thresh
=
model_config
.
detection_generator
.
iou_thresh
,
nms_thresh
=
model_config
.
detection_generator
.
nms_thresh
,
max_boxes
=
model_config
.
detection_generator
.
max_boxes
,
pre_nms_points
=
model_config
.
detection_generator
.
pre_nms_points
,
nms_type
=
model_config
.
detection_generator
.
nms_type
,
box_type
=
model_config
.
detection_generator
.
box_type
.
get
(),
path_scale
=
model_config
.
detection_generator
.
path_scales
.
get
(),
scale_xy
=
model_config
.
detection_generator
.
scale_xy
.
get
(),
label_smoothing
=
model_config
.
loss
.
label_smoothing
,
use_scaled_loss
=
model_config
.
loss
.
use_scaled_loss
,
update_on_repeat
=
model_config
.
loss
.
update_on_repeat
,
truth_thresh
=
model_config
.
loss
.
truth_thresh
.
get
(),
loss_type
=
model_config
.
loss
.
box_loss_type
.
get
(),
max_delta
=
model_config
.
loss
.
max_delta
.
get
(),
iou_normalizer
=
model_config
.
loss
.
iou_normalizer
.
get
(),
cls_normalizer
=
model_config
.
loss
.
cls_normalizer
.
get
(),
object_normalizer
=
model_config
.
loss
.
object_normalizer
.
get
(),
ignore_thresh
=
model_config
.
loss
.
ignore_thresh
.
get
(),
objectness_smooth
=
model_config
.
loss
.
objectness_smooth
.
get
())
return
model
def
build_yolo_head
(
input_specs
,
model_config
:
yolo
.
Yolo
,
l2_regularization
):
"""Builds yolo head."""
min_level
=
min
(
map
(
int
,
input_specs
.
keys
()))
max_level
=
max
(
map
(
int
,
input_specs
.
keys
()))
head
=
yolo_head
.
YoloHead
(
min_level
=
min_level
,
max_level
=
max_level
,
classes
=
model_config
.
num_classes
,
boxes_per_level
=
model_config
.
anchor_boxes
.
anchors_per_scale
,
norm_momentum
=
model_config
.
norm_activation
.
norm_momentum
,
norm_epsilon
=
model_config
.
norm_activation
.
norm_epsilon
,
kernel_regularizer
=
l2_regularization
,
smart_bias
=
model_config
.
head
.
smart_bias
)
return
head
def
build_yolo
(
input_specs
,
model_config
,
l2_regularization
):
"""Builds yolo model."""
backbone
=
model_config
.
backbone
.
get
()
anchor_dict
,
_
=
model_config
.
anchor_boxes
.
get
(
backbone
.
min_level
,
backbone
.
max_level
)
backbone
=
backbone_factory
.
build_backbone
(
input_specs
,
model_config
.
backbone
,
model_config
.
norm_activation
,
l2_regularization
)
decoder
=
decoder_factory
.
build_decoder
(
backbone
.
output_specs
,
model_config
,
l2_regularization
)
head
=
build_yolo_head
(
decoder
.
output_specs
,
model_config
,
l2_regularization
)
detection_generator_obj
=
build_yolo_detection_generator
(
model_config
,
anchor_dict
)
model
=
yolo_model
.
Yolo
(
backbone
=
backbone
,
decoder
=
decoder
,
head
=
head
,
detection_generator
=
detection_generator_obj
)
model
.
build
(
input_specs
.
shape
)
model
.
summary
(
print_fn
=
logging
.
info
)
losses
=
detection_generator_obj
.
get_losses
()
return
model
,
losses
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
View file @
6a55ecde
...
...
@@ -36,7 +36,7 @@ class YoloLayer(tf.keras.Model):
loss_type
=
'ciou'
,
iou_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
obj
ect
_normalizer
=
1.0
,
use_scaled_loss
=
False
,
update_on_repeat
=
False
,
pre_nms_points
=
5000
,
...
...
@@ -67,7 +67,8 @@ class YoloLayer(tf.keras.Model):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss.
update_on_repeat: `bool` indicating how you would like to handle repeated
...
...
@@ -110,7 +111,7 @@ class YoloLayer(tf.keras.Model):
self
.
_truth_thresh
=
truth_thresh
self
.
_iou_normalizer
=
iou_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_obj_normalizer
=
obj_normalizer
self
.
_obj
ect
_normalizer
=
obj
ect
_normalizer
self
.
_objectness_smooth
=
objectness_smooth
self
.
_nms_thresh
=
nms_thresh
self
.
_max_boxes
=
max_boxes
...
...
@@ -289,7 +290,7 @@ class YoloLayer(tf.keras.Model):
loss_types
=
self
.
_loss_type
,
iou_normalizers
=
self
.
_iou_normalizer
,
cls_normalizers
=
self
.
_cls_normalizer
,
obj_normalizers
=
self
.
_obj_normalizer
,
obj
ect
_normalizers
=
self
.
_obj
ect
_normalizer
,
objectness_smooths
=
self
.
_objectness_smooth
,
box_types
=
self
.
_box_type
,
max_deltas
=
self
.
_max_delta
,
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
View file @
6a55ecde
...
...
@@ -14,7 +14,9 @@
"""Contains common building blocks for yolo neural networks."""
from
typing
import
Callable
,
List
,
Tuple
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.ops
import
spatial_transform_ops
...
...
@@ -141,6 +143,7 @@ class ConvBN(tf.keras.layers.Layer):
# activation params
self
.
_activation
=
activation
self
.
_leaky_alpha
=
leaky_alpha
self
.
_fuse
=
False
super
().
__init__
(
**
kwargs
)
...
...
@@ -164,6 +167,8 @@ class ConvBN(tf.keras.layers.Layer):
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
axis
=
self
.
_bn_axis
)
else
:
self
.
bn
=
None
if
self
.
_activation
==
'leaky'
:
self
.
_activation_fn
=
tf
.
keras
.
layers
.
LeakyReLU
(
alpha
=
self
.
_leaky_alpha
)
...
...
@@ -174,11 +179,44 @@ class ConvBN(tf.keras.layers.Layer):
def
call
(
self
,
x
):
x
=
self
.
conv
(
x
)
if
self
.
_use_bn
:
if
self
.
_use_bn
and
not
self
.
_fuse
:
x
=
self
.
bn
(
x
)
x
=
self
.
_activation_fn
(
x
)
return
x
def
fuse
(
self
):
if
self
.
bn
is
not
None
and
not
self
.
_use_separable_conv
:
# Fuse convolution and batchnorm, gives me +2 to 3 FPS 2ms latency.
# layers: https://tehnokv.com/posts/fusing-batchnorm-and-conv/
if
self
.
_fuse
:
return
self
.
_fuse
=
True
conv_weights
=
self
.
conv
.
get_weights
()[
0
]
gamma
,
beta
,
moving_mean
,
moving_variance
=
self
.
bn
.
get_weights
()
self
.
conv
.
use_bias
=
True
infilters
=
conv_weights
.
shape
[
-
2
]
self
.
conv
.
build
([
None
,
None
,
None
,
infilters
])
base
=
tf
.
sqrt
(
self
.
_norm_epsilon
+
moving_variance
)
w_conv_base
=
tf
.
transpose
(
conv_weights
,
perm
=
(
3
,
2
,
0
,
1
))
w_conv
=
tf
.
reshape
(
w_conv_base
,
[
conv_weights
.
shape
[
-
1
],
-
1
])
w_bn
=
tf
.
linalg
.
diag
(
gamma
/
base
)
w_conv
=
tf
.
reshape
(
tf
.
matmul
(
w_bn
,
w_conv
),
w_conv_base
.
get_shape
())
w_conv
=
tf
.
transpose
(
w_conv
,
perm
=
(
2
,
3
,
1
,
0
))
b_bn
=
beta
-
gamma
*
moving_mean
/
base
self
.
conv
.
set_weights
([
w_conv
,
b_bn
])
del
self
.
bn
self
.
trainable
=
False
self
.
conv
.
trainable
=
False
self
.
bn
=
None
return
def
get_config
(
self
):
# used to store/share parameters to reconstruct the model
layer_config
=
{
...
...
official/vision/beta/projects/yolo/modeling/yolo_model.py
View file @
6a55ecde
...
...
@@ -14,72 +14,19 @@
"""Yolo models."""
from
typing
import
Mapping
,
Union
import
tensorflow
as
tf
# static base Yolo Models that do not require configuration
# similar to a backbone model id.
# this is done greatly simplify the model config
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS
=
{
"v4"
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
csp
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
5
,
fpn_depth
=
5
,
path_process_len
=
6
),
csp_large
=
dict
(
embed_spp
=
False
,
use_fpn
=
True
,
max_level_process_len
=
None
,
csp_stack
=
7
,
fpn_depth
=
7
,
path_process_len
=
8
,
fpn_filter_scale
=
2
),
),
"v3"
:
dict
(
regular
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
None
,
path_process_len
=
6
),
tiny
=
dict
(
embed_spp
=
False
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
spp
=
dict
(
embed_spp
=
True
,
use_fpn
=
False
,
max_level_process_len
=
2
,
path_process_len
=
1
),
),
}
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
class
Yolo
(
tf
.
keras
.
Model
):
"""The YOLO model class."""
def
__init__
(
self
,
backbone
=
None
,
decoder
=
None
,
head
=
None
,
detection_generator
=
None
,
backbone
,
decoder
,
head
,
detection_generator
,
**
kwargs
):
"""Detection initialization function.
...
...
@@ -93,10 +40,10 @@ class Yolo(tf.keras.Model):
super
(
Yolo
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
"
backbone
"
:
backbone
,
"
decoder
"
:
decoder
,
"
head
"
:
head
,
"filter"
:
detection_generator
'
backbone
'
:
backbone
,
'
decoder
'
:
decoder
,
'
head
'
:
head
,
'detection_generator'
:
detection_generator
}
# model components
...
...
@@ -104,18 +51,19 @@ class Yolo(tf.keras.Model):
self
.
_decoder
=
decoder
self
.
_head
=
head
self
.
_detection_generator
=
detection_generator
self
.
_fused
=
False
return
def
call
(
self
,
inputs
,
training
=
False
):
maps
=
self
.
_
backbone
(
inputs
)
decoded_maps
=
self
.
_
decoder
(
maps
)
raw_predictions
=
self
.
_
head
(
decoded_maps
)
maps
=
self
.
backbone
(
inputs
)
decoded_maps
=
self
.
decoder
(
maps
)
raw_predictions
=
self
.
head
(
decoded_maps
)
if
training
:
return
{
"
raw_output
"
:
raw_predictions
}
return
{
'
raw_output
'
:
raw_predictions
}
else
:
# Post-processing.
predictions
=
self
.
_
detection_generator
(
raw_predictions
)
predictions
.
update
({
"
raw_output
"
:
raw_predictions
})
predictions
=
self
.
detection_generator
(
raw_predictions
)
predictions
.
update
({
'
raw_output
'
:
raw_predictions
})
return
predictions
@
property
...
...
@@ -141,28 +89,22 @@ class Yolo(tf.keras.Model):
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
def
get_weight_groups
(
self
,
train_vars
):
"""Sort the list of trainable variables into groups for optimization.
Args:
train_vars: a list of tf.Variables that need to get sorted into their
respective groups.
Returns:
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
other: a list of tf.Variables for the other operations.
"""
bias
=
[]
weights
=
[]
other
=
[]
for
var
in
train_vars
:
if
"bias"
in
var
.
name
:
bias
.
append
(
var
)
elif
"beta"
in
var
.
name
:
bias
.
append
(
var
)
elif
"kernel"
in
var
.
name
or
"weight"
in
var
.
name
:
weights
.
append
(
var
)
else
:
other
.
append
(
var
)
return
weights
,
bias
,
other
@
property
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
Union
[
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items
=
dict
(
backbone
=
self
.
backbone
,
head
=
self
.
head
)
if
self
.
decoder
is
not
None
:
items
.
update
(
decoder
=
self
.
decoder
)
return
items
def
fuse
(
self
):
"""Fuses all Convolution and Batchnorm layers to get better latency."""
print
(
'Fusing Conv Batch Norm Layers.'
)
if
not
self
.
_fused
:
self
.
_fused
=
True
for
layer
in
self
.
submodules
:
if
isinstance
(
layer
,
nn_blocks
.
ConvBN
):
layer
.
fuse
()
self
.
summary
()
return
official/vision/beta/projects/yolo/ops/mosaic.py
View file @
6a55ecde
...
...
@@ -14,7 +14,6 @@
"""Mosaic op."""
import
random
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
...
...
@@ -55,7 +54,7 @@ class Mosaic:
the images should be preserved.
jitter: `float` for the maximum change in aspect ratio expected in each
preprocessing step.
mosaic_crop_mode: `str` the
y
type of mosaic to apply. The options are
mosaic_crop_mode: `str` the type of mosaic to apply. The options are
{crop, scale, None}, crop will construct a mosaic by slicing images
togther, scale will create a mosaic by concatnating and shifting the
image, and None will default to scale and apply no post processing to
...
...
@@ -325,6 +324,12 @@ class Mosaic:
else
:
return
self
.
_add_param
(
noop
)
def
_beta
(
self
,
alpha
,
beta
):
"""Generates a random number using the beta distribution."""
a
=
tf
.
random
.
gamma
([],
alpha
)
b
=
tf
.
random
.
gamma
([],
beta
)
return
b
/
(
a
+
b
)
def
_mixup
(
self
,
one
,
two
):
"""Blend together 2 images for the mixup data augmentation."""
if
self
.
_mixup_frequency
>=
1.0
:
...
...
@@ -337,8 +342,8 @@ class Mosaic:
if
domo
>=
(
1
-
self
.
_mixup_frequency
):
sample
=
one
otype
=
one
[
'image'
].
dtype
r
=
preprocessing_ops
.
random_uniform_strong
(
0.4
,
0.6
,
tf
.
float32
,
seed
=
self
.
_seed
)
r
=
self
.
_beta
(
8.0
,
8.0
)
sample
[
'image'
]
=
(
r
*
tf
.
cast
(
one
[
'image'
],
tf
.
float32
)
+
(
1
-
r
)
*
tf
.
cast
(
two
[
'image'
],
tf
.
float32
))
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment