Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2b676a9b
Commit
2b676a9b
authored
Jun 16, 2021
by
Gunho Park
Browse files
Merge remote-tracking branch 'upstream/master'
parents
6ddd627a
bcbce005
Changes
28
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1269 additions
and
275 deletions
+1269
-275
official/core/base_trainer.py
official/core/base_trainer.py
+3
-2
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+16
-18
official/nlp/data/sentence_prediction_dataloader.py
official/nlp/data/sentence_prediction_dataloader.py
+7
-5
official/nlp/data/sentence_prediction_dataloader_test.py
official/nlp/data/sentence_prediction_dataloader_test.py
+9
-6
official/nlp/projects/mobilebert/README.md
official/nlp/projects/mobilebert/README.md
+1
-1
official/nlp/tasks/sentence_prediction.py
official/nlp/tasks/sentence_prediction.py
+10
-6
official/projects/README.md
official/projects/README.md
+2
-0
official/vision/beta/data/create_coco_tf_record.py
official/vision/beta/data/create_coco_tf_record.py
+27
-10
official/vision/beta/data/process_coco_few_shot.sh
official/vision/beta/data/process_coco_few_shot.sh
+48
-0
official/vision/beta/data/process_coco_few_shot_json_files.py
...cial/vision/beta/data/process_coco_few_shot_json_files.py
+124
-0
official/vision/beta/projects/movinet/modeling/movinet.py
official/vision/beta/projects/movinet/modeling/movinet.py
+11
-2
official/vision/beta/projects/movinet/modeling/movinet_layers.py
...l/vision/beta/projects/movinet/modeling/movinet_layers.py
+21
-2
official/vision/beta/projects/movinet/modeling/movinet_model.py
...al/vision/beta/projects/movinet/modeling/movinet_model.py
+22
-6
official/vision/beta/projects/yolo/README.md
official/vision/beta/projects/yolo/README.md
+5
-0
official/vision/beta/projects/yolo/configs/backbones.py
official/vision/beta/projects/yolo/configs/backbones.py
+7
-4
official/vision/beta/projects/yolo/configs/darknet_classification.py
...sion/beta/projects/yolo/configs/darknet_classification.py
+1
-1
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+422
-184
official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py
...ion/beta/projects/yolo/modeling/backbones/darknet_test.py
+41
-28
official/vision/beta/projects/yolo/modeling/decoders/__init__.py
...l/vision/beta/projects/yolo/modeling/decoders/__init__.py
+14
-0
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+478
-0
No files found.
official/core/base_trainer.py
View file @
2b676a9b
...
...
@@ -246,10 +246,11 @@ class Trainer(_AsyncTrainer):
self
.
_train_loss
=
tf
.
keras
.
metrics
.
Mean
(
"training_loss"
,
dtype
=
tf
.
float32
)
self
.
_validation_loss
=
tf
.
keras
.
metrics
.
Mean
(
"validation_loss"
,
dtype
=
tf
.
float32
)
model_metrics
=
model
.
metrics
if
hasattr
(
model
,
"metrics"
)
else
[]
self
.
_train_metrics
=
self
.
task
.
build_metrics
(
training
=
True
)
+
self
.
model
.
metrics
training
=
True
)
+
model
_
metrics
self
.
_validation_metrics
=
self
.
task
.
build_metrics
(
training
=
False
)
+
self
.
model
.
metrics
training
=
False
)
+
model
_
metrics
self
.
init_async
()
...
...
official/nlp/data/classifier_data_lib.py
View file @
2b676a9b
...
...
@@ -181,20 +181,21 @@ class AxProcessor(DataProcessor):
class
ColaProcessor
(
DataProcessor
):
"""Processor for the CoLA data set (GLUE version)."""
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
super
(
ColaProcessor
,
self
).
__init__
(
process_text_fn
)
self
.
dataset
=
tfds
.
load
(
"glue/cola"
,
try_gcs
=
True
)
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"train.tsv"
)),
"train"
)
return
self
.
_create_examples_tfds
(
"train"
)
def
get_dev_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"dev.tsv"
)),
"dev"
)
return
self
.
_create_examples_tfds
(
"validation"
)
def
get_test_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"test.tsv"
)),
"test"
)
return
self
.
_create_examples_tfds
(
"test"
)
def
get_labels
(
self
):
"""See base class."""
...
...
@@ -205,22 +206,19 @@ class ColaProcessor(DataProcessor):
"""See base class."""
return
"COLA"
def
_create_examples
(
self
,
lines
,
set_type
):
def
_create_examples
_tfds
(
self
,
set_type
):
"""Creates examples for the training/dev/test sets."""
dataset
=
self
.
dataset
[
set_type
].
as_numpy_iterator
()
examples
=
[]
for
i
,
line
in
enumerate
(
lines
):
# Only the test set has a header.
if
set_type
==
"test"
and
i
==
0
:
continue
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
if
set_type
==
"test"
:
text_a
=
self
.
process_text_fn
(
line
[
1
])
label
=
"0"
else
:
text_a
=
self
.
process_text_fn
(
line
[
3
])
label
=
self
.
process_text_fn
(
line
[
1
])
label
=
"0"
text_a
=
self
.
process_text_fn
(
example
[
"sentence"
])
if
set_type
!=
"test"
:
label
=
str
(
example
[
"label"
])
examples
.
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
None
,
label
=
label
))
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
None
,
label
=
label
,
weight
=
None
))
return
examples
...
...
official/nlp/data/sentence_prediction_dataloader.py
View file @
2b676a9b
...
...
@@ -40,6 +40,7 @@ class SentencePredictionDataConfig(cfg.DataConfig):
label_type
:
str
=
'int'
# Whether to include the example id number.
include_example_id
:
bool
=
False
label_field
:
str
=
'label_ids'
# Maps the key in TfExample to feature name.
# E.g 'label_ids' to 'next_sentence_labels'
label_name
:
Optional
[
Tuple
[
str
,
str
]]
=
None
...
...
@@ -53,6 +54,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
self
.
_params
=
params
self
.
_seq_length
=
params
.
seq_length
self
.
_include_example_id
=
params
.
include_example_id
self
.
_label_field
=
params
.
label_field
if
params
.
label_name
:
self
.
_label_name_mapping
=
dict
([
params
.
label_name
])
else
:
...
...
@@ -65,7 +67,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'
label_
ids'
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
self
.
_
label_
field
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
}
if
self
.
_include_example_id
:
name_to_features
[
'example_id'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
...
...
@@ -92,10 +94,10 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
if
self
.
_include_example_id
:
x
[
'example_id'
]
=
record
[
'example_id'
]
x
[
'
label_
ids'
]
=
record
[
'
label_
ids'
]
x
[
self
.
_
label_
field
]
=
record
[
self
.
_
label_
field
]
if
'
label_
ids'
in
self
.
_label_name_mapping
:
x
[
self
.
_label_name_mapping
[
'
label_
ids'
]]
=
record
[
'
label_
ids'
]
if
self
.
_
label_
field
in
self
.
_label_name_mapping
:
x
[
self
.
_label_name_mapping
[
self
.
_
label_
field
]]
=
record
[
self
.
_
label_
field
]
return
x
...
...
@@ -215,7 +217,7 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader):
model_inputs
=
self
.
_text_processor
(
segments
)
if
self
.
_include_example_id
:
model_inputs
[
'example_id'
]
=
record
[
'example_id'
]
model_inputs
[
'
label_
ids'
]
=
record
[
self
.
_label_field
]
model_inputs
[
self
.
_
label_
field
]
=
record
[
self
.
_label_field
]
return
model_inputs
def
_decode
(
self
,
record
:
tf
.
Tensor
):
...
...
official/nlp/data/sentence_prediction_dataloader_test.py
View file @
2b676a9b
...
...
@@ -197,13 +197,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
vocab_file
=
vocab_file_path
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
@
parameterized
.
parameters
(
True
,
False
)
def
test_python_sentencepiece_preprocessing
(
self
,
use_tfds
):
...
...
@@ -231,13 +232,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
@
parameterized
.
parameters
(
True
,
False
)
def
test_saved_model_preprocessing
(
self
,
use_tfds
):
...
...
@@ -265,13 +267,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
if
__name__
==
'__main__'
:
...
...
official/nlp/projects/mobilebert/README.md
View file @
2b676a9b
...
...
@@ -22,7 +22,7 @@ modeling library:
*
[
mobile_bert_encoder.py
](
https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/mobile_bert_encoder.py
)
contains
`MobileBERTEncoder`
implementation.
*
[
mobile_bert_layers.py
](
https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/mobile_bert_layers.py
)
contains
`MobileBertEmbedding`
,
`MobileBert
MaskedLM
`
and
`MobileBertMaskedLM`
contains
`MobileBertEmbedding`
,
`MobileBert
Transformer
`
and
`MobileBertMaskedLM`
implementation.
## Pre-trained Models
...
...
official/nlp/tasks/sentence_prediction.py
View file @
2b676a9b
...
...
@@ -69,6 +69,10 @@ class SentencePredictionTask(base_task.Task):
if
params
.
metric_type
not
in
METRIC_TYPES
:
raise
ValueError
(
'Invalid metric_type: {}'
.
format
(
params
.
metric_type
))
self
.
metric_type
=
params
.
metric_type
if
hasattr
(
params
.
train_data
,
'label_field'
):
self
.
label_field
=
params
.
train_data
.
label_field
else
:
self
.
label_field
=
'label_ids'
def
build_model
(
self
):
if
self
.
task_config
.
hub_module_url
and
self
.
task_config
.
init_checkpoint
:
...
...
@@ -95,7 +99,7 @@ class SentencePredictionTask(base_task.Task):
use_encoder_pooler
=
self
.
task_config
.
model
.
use_encoder_pooler
)
def
build_losses
(
self
,
labels
,
model_outputs
,
aux_losses
=
None
)
->
tf
.
Tensor
:
label_ids
=
labels
[
'
label_
ids'
]
label_ids
=
labels
[
self
.
label_
field
]
if
self
.
task_config
.
model
.
num_classes
==
1
:
loss
=
tf
.
keras
.
losses
.
mean_squared_error
(
label_ids
,
model_outputs
)
else
:
...
...
@@ -121,7 +125,7 @@ class SentencePredictionTask(base_task.Task):
y
=
tf
.
zeros
((
1
,),
dtype
=
tf
.
float32
)
else
:
y
=
tf
.
zeros
((
1
,
1
),
dtype
=
tf
.
int32
)
x
[
'
label_
ids'
]
=
y
x
[
self
.
label_
field
]
=
y
return
x
dataset
=
tf
.
data
.
Dataset
.
range
(
1
)
...
...
@@ -144,10 +148,10 @@ class SentencePredictionTask(base_task.Task):
def
process_metrics
(
self
,
metrics
,
labels
,
model_outputs
):
for
metric
in
metrics
:
metric
.
update_state
(
labels
[
'
label_
ids'
],
model_outputs
)
metric
.
update_state
(
labels
[
self
.
label_
field
],
model_outputs
)
def
process_compiled_metrics
(
self
,
compiled_metrics
,
labels
,
model_outputs
):
compiled_metrics
.
update_state
(
labels
,
model_outputs
)
compiled_metrics
.
update_state
(
labels
[
self
.
label_field
]
,
model_outputs
)
def
validation_step
(
self
,
inputs
,
model
:
tf
.
keras
.
Model
,
metrics
=
None
):
if
self
.
metric_type
==
'accuracy'
:
...
...
@@ -163,12 +167,12 @@ class SentencePredictionTask(base_task.Task):
'sentence_prediction'
:
# Ensure one prediction along batch dimension.
tf
.
expand_dims
(
tf
.
math
.
argmax
(
outputs
,
axis
=
1
),
axis
=
1
),
'labels'
:
labels
[
'
label_
ids'
],
labels
[
self
.
label_
field
],
})
if
self
.
metric_type
==
'pearson_spearman_corr'
:
logs
.
update
({
'sentence_prediction'
:
outputs
,
'labels'
:
labels
[
'
label_
ids'
],
'labels'
:
labels
[
self
.
label_
field
],
})
return
logs
...
...
official/projects/README.md
0 → 100644
View file @
2b676a9b
This directory contains projects using TensorFlow Model Garden Modeling
libraries.
official/vision/beta/data/create_coco_tf_record.py
View file @
2b676a9b
...
...
@@ -46,7 +46,7 @@ from official.vision.beta.data import tfrecord_lib
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
flags
.
DEFINE_string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_
multi_
string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_string
(
'image_info_file'
,
''
,
'File containing image information. '
'Tf Examples in the output files correspond to the image '
...
...
@@ -159,7 +159,7 @@ def encode_caption_annotations(caption_annotations):
def
create_tf_example
(
image
,
image_dir
,
image_dir
s
,
bbox_annotations
=
None
,
id_to_name_map
=
None
,
caption_annotations
=
None
,
...
...
@@ -169,7 +169,7 @@ def create_tf_example(image,
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
image_dir: director
y
containing the image files.
image_dir
s
:
list of
director
ies
containing the image files.
bbox_annotations:
list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
...
...
@@ -190,14 +190,31 @@ def create_tf_example(image,
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
ValueError: if the image pointed to by data['filename'] is not a valid JPEG,
does not exist, or is not unique across image directories.
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
if
len
(
image_dirs
)
>
1
:
full_paths
=
[
os
.
path
.
join
(
image_dir
,
filename
)
for
image_dir
in
image_dirs
]
full_existing_paths
=
[
p
for
p
in
full_paths
if
tf
.
io
.
gfile
.
exists
(
p
)]
if
not
full_existing_paths
:
raise
ValueError
(
'{} does not exist across image directories.'
.
format
(
filename
))
if
len
(
full_existing_paths
)
>
1
:
raise
ValueError
(
'{} is not unique across image directories'
.
format
(
filename
))
full_path
,
=
full_existing_paths
# If there is only one image directory, it's not worth checking for existence,
# since trying to open the file will raise an informative error message if it
# does not exist.
else
:
image_dir
,
=
image_dirs
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
io
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
...
...
@@ -276,7 +293,7 @@ def _load_images_info(images_info_file):
return
info_dict
[
'images'
]
def
generate_annotations
(
images
,
image_dir
,
def
generate_annotations
(
images
,
image_dir
s
,
img_to_obj_annotation
=
None
,
img_to_caption_annotation
=
None
,
id_to_name_map
=
None
,
include_masks
=
False
):
...
...
@@ -289,12 +306,12 @@ def generate_annotations(images, image_dir,
caption_annotaion
=
(
img_to_caption_annotation
.
get
(
image
[
'id'
],
None
)
if
img_to_caption_annotation
else
None
)
yield
(
image
,
image_dir
,
object_annotation
,
id_to_name_map
,
yield
(
image
,
image_dir
s
,
object_annotation
,
id_to_name_map
,
caption_annotaion
,
include_masks
)
def
_create_tf_record_from_coco_annotations
(
images_info_file
,
image_dir
,
image_dir
s
,
output_path
,
num_shards
,
object_annotations_file
=
None
,
...
...
@@ -309,7 +326,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
files Eg. 'image_info_test-dev2017.json',
'instance_annotations_train2017.json',
'caption_annotations_train2017.json', etc.
image_dir:
D
irector
y
containing the image files.
image_dir
s
:
List of d
irector
ies
containing the image files.
output_path: Path to output tf.Record file.
num_shards: Number of output files to create.
object_annotations_file: JSON file containing bounding box annotations.
...
...
@@ -333,7 +350,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
_load_caption_annotations
(
caption_annotations_file
))
coco_annotations_iter
=
generate_annotations
(
images
,
image_dir
,
img_to_obj_annotation
,
img_to_caption_annotation
,
images
,
image_dir
s
,
img_to_obj_annotation
,
img_to_caption_annotation
,
id_to_name_map
=
id_to_name_map
,
include_masks
=
include_masks
)
num_skipped
=
tfrecord_lib
.
write_tf_record_dataset
(
...
...
official/vision/beta/data/process_coco_few_shot.sh
0 → 100644
View file @
2b676a9b
#!/bin/bash
#
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir
=
$(
mktemp
-d
-t
coco-XXXXXXXXXX
)
output_dir
=
"/tmp/coco_few_shot"
while
getopts
"o:"
o
;
do
case
"
${
o
}
"
in
o
)
output_dir
=
${
OPTARG
}
;;
*
)
echo
"Usage:
${
0
}
[-o <output_dir>]"
1>&2
;
exit
1
;;
esac
done
cocosplit_url
=
"dl.yf.io/fs-det/datasets/cocosplit"
wget
--recursive
--no-parent
-q
--show-progress
--progress
=
bar:force:noscroll
\
-P
"
${
tmp_dir
}
"
-A
"5k.json,*10shot*.json,*30shot*.json"
\
"http://
${
cocosplit_url
}
/"
mv
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
*
"
${
tmp_dir
}
"
rm
-rf
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
python process_coco_few_shot_json_files.py
\
--logtostderr
--workdir
=
"
${
tmp_dir
}
"
for
seed
in
{
0..9
}
;
do
for
shots
in
10 30
;
do
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/
${
shots
}
shot_seed
${
seed
}
"
\
--num_shards
=
4
done
done
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/5k"
\
--num_shards
=
10
rm
-rf
"
${
tmp_dir
}
"
official/vision/beta/data/process_coco_few_shot_json_files.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Processes the JSON files for COCO few-shot.
We assume that `workdir` mirrors the contents of
http://dl.yf.io/fs-det/datasets/cocosplit/, which contains the official JSON
files for the few-shot COCO evaluation procedure that Wang et al. (2020)'s
"Frustratingly Simple Few-Shot Object Detection" paper uses.
"""
import
collections
import
itertools
import
json
import
logging
import
os
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
logger
=
tf
.
get_logger
()
logger
.
setLevel
(
logging
.
INFO
)
flags
.
DEFINE_string
(
'workdir'
,
None
,
'Working directory.'
)
FLAGS
=
flags
.
FLAGS
CATEGORIES
=
[
'airplane'
,
'apple'
,
'backpack'
,
'banana'
,
'baseball bat'
,
'baseball glove'
,
'bear'
,
'bed'
,
'bench'
,
'bicycle'
,
'bird'
,
'boat'
,
'book'
,
'bottle'
,
'bowl'
,
'broccoli'
,
'bus'
,
'cake'
,
'car'
,
'carrot'
,
'cat'
,
'cell phone'
,
'chair'
,
'clock'
,
'couch'
,
'cow'
,
'cup'
,
'dining table'
,
'dog'
,
'donut'
,
'elephant'
,
'fire hydrant'
,
'fork'
,
'frisbee'
,
'giraffe'
,
'hair drier'
,
'handbag'
,
'horse'
,
'hot dog'
,
'keyboard'
,
'kite'
,
'knife'
,
'laptop'
,
'microwave'
,
'motorcycle'
,
'mouse'
,
'orange'
,
'oven'
,
'parking meter'
,
'person'
,
'pizza'
,
'potted plant'
,
'refrigerator'
,
'remote'
,
'sandwich'
,
'scissors'
,
'sheep'
,
'sink'
,
'skateboard'
,
'skis'
,
'snowboard'
,
'spoon'
,
'sports ball'
,
'stop sign'
,
'suitcase'
,
'surfboard'
,
'teddy bear'
,
'tennis racket'
,
'tie'
,
'toaster'
,
'toilet'
,
'toothbrush'
,
'traffic light'
,
'train'
,
'truck'
,
'tv'
,
'umbrella'
,
'vase'
,
'wine glass'
,
'zebra'
]
SEEDS
=
list
(
range
(
10
))
SHOTS
=
[
10
,
30
]
FILE_SUFFIXES
=
collections
.
defaultdict
(
list
)
for
_seed
,
_shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
for
_category
in
CATEGORIES
:
FILE_SUFFIXES
[(
_seed
,
_shots
)].
append
(
'{}full_box_{}shot_{}_trainval.json'
.
format
(
# http://dl.yf.io/fs-det/datasets/cocosplit/ is organized like so:
#
# datasplit/
# trainvalno5k.json
# 5k.json
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
# seed{1-9}/
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
#
# This means that the JSON files for seed0 are located in the root
# directory rather than in a `seed?/` subdirectory, hence the
# conditional expression below.
''
if
_seed
==
0
else
'seed{}/'
.
format
(
_seed
),
_shots
,
_category
))
def
main
(
unused_argv
):
workdir
=
FLAGS
.
workdir
for
seed
,
shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
# Retrieve all examples for a given seed and shots setting.
file_paths
=
[
os
.
path
.
join
(
workdir
,
suffix
)
for
suffix
in
FILE_SUFFIXES
[(
seed
,
shots
)]]
json_dicts
=
[]
for
file_path
in
file_paths
:
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'r'
)
as
f
:
json_dicts
.
append
(
json
.
load
(
f
))
# Make sure that all JSON files for a given seed and shots setting have the
# same metadata. We count on this to fuse them later on.
metadata_dicts
=
[{
'info'
:
d
[
'info'
],
'licenses'
:
d
[
'licenses'
],
'categories'
:
d
[
'categories'
]}
for
d
in
json_dicts
]
if
not
all
(
d
==
metadata_dicts
[
0
]
for
d
in
metadata_dicts
[
1
:]):
raise
RuntimeError
(
'JSON files for {} shots (seed {}) '
.
format
(
shots
,
seed
)
+
'have different info, licences, or categories fields'
)
# Retrieve images across all JSON files.
images
=
sum
((
d
[
'images'
]
for
d
in
json_dicts
),
[])
# Remove duplicate image entries.
images
=
list
({
image
[
'id'
]:
image
for
image
in
images
}.
values
())
output_dict
=
{
'info'
:
json_dicts
[
0
][
'info'
],
'licenses'
:
json_dicts
[
0
][
'licenses'
],
'categories'
:
json_dicts
[
0
][
'categories'
],
'images'
:
images
,
'annotations'
:
sum
((
d
[
'annotations'
]
for
d
in
json_dicts
),
[])
}
output_path
=
os
.
path
.
join
(
workdir
,
'{}shot_seed{}.json'
.
format
(
shots
,
seed
))
with
tf
.
io
.
gfile
.
GFile
(
output_path
,
'w'
)
as
f
:
json
.
dump
(
output_dict
,
f
)
logger
.
info
(
'Processed %d shots (seed %d) and saved to %s'
,
shots
,
seed
,
output_path
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'workdir'
)
app
.
run
(
main
)
official/vision/beta/projects/movinet/modeling/movinet.py
View file @
2b676a9b
...
...
@@ -525,7 +525,6 @@ class Movinet(tf.keras.Model):
Returns:
A dict mapping state names to state shapes.
"""
def
divide_resolution
(
shape
,
num_downsamples
):
"""Downsamples the dimension to calculate strided convolution shape."""
if
shape
is
None
:
...
...
@@ -564,6 +563,12 @@ class Movinet(tf.keras.Model):
for
layer_idx
,
layer
in
enumerate
(
params
):
expand_filters
,
kernel_size
,
strides
=
layer
# If we use a 2D kernel, we apply spatial downsampling
# before the buffer.
if
(
tuple
(
strides
[
1
:
3
])
!=
(
1
,
1
)
and
self
.
_conv_type
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
if
kernel_size
[
0
]
>
1
:
states
[
f
'state/b
{
block_idx
}
/l
{
layer_idx
}
/stream_buffer'
]
=
(
input_shape
[
0
],
...
...
@@ -585,7 +590,11 @@ class Movinet(tf.keras.Model):
if
strides
[
1
]
!=
strides
[
2
]:
raise
ValueError
(
'Strides must match in the spatial dimensions, '
'got {}'
.
format
(
strides
))
if
strides
[
1
]
!=
1
or
strides
[
2
]
!=
1
:
# If we use a 3D kernel, we apply spatial downsampling
# after the buffer.
if
(
tuple
(
strides
[
1
:
3
])
!=
(
1
,
1
)
and
self
.
_conv_type
not
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
elif
isinstance
(
block
,
HeadSpec
):
states
[
'state/head/pool_buffer'
]
=
(
...
...
official/vision/beta/projects/movinet/modeling/movinet_layers.py
View file @
2b676a9b
...
...
@@ -633,9 +633,28 @@ class StreamConvBlock(ConvBlock):
states
=
dict
(
states
)
if
states
is
not
None
else
{}
x
=
inputs
if
self
.
_stream_buffer
is
not
None
:
# If we have no separate temporal conv, use the buffer before the 3D conv.
if
self
.
_conv_temporal
is
None
and
self
.
_stream_buffer
is
not
None
:
x
,
states
=
self
.
_stream_buffer
(
x
,
states
=
states
)
x
=
super
(
StreamConvBlock
,
self
).
call
(
x
)
x
=
self
.
_conv
(
x
)
if
self
.
_batch_norm
is
not
None
:
x
=
self
.
_batch_norm
(
x
)
if
self
.
_activation_layer
is
not
None
:
x
=
self
.
_activation_layer
(
x
)
if
self
.
_conv_temporal
is
not
None
:
if
self
.
_stream_buffer
is
not
None
:
# If we have a separate temporal conv, use the buffer before the
# 1D conv instead (otherwise, we may waste computation on the 2D conv).
x
,
states
=
self
.
_stream_buffer
(
x
,
states
=
states
)
x
=
self
.
_conv_temporal
(
x
)
if
self
.
_batch_norm_temporal
is
not
None
:
x
=
self
.
_batch_norm_temporal
(
x
)
if
self
.
_activation_layer
is
not
None
:
x
=
self
.
_activation_layer
(
x
)
return
x
,
states
...
...
official/vision/beta/projects/movinet/modeling/movinet_model.py
View file @
2b676a9b
...
...
@@ -115,15 +115,31 @@ class MovinetClassifier(tf.keras.Model):
inputs
=
{
**
states
,
'image'
:
image
}
if
backbone
.
use_external_states
:
before_states
=
set
(
states
)
before_states
=
states
endpoints
,
states
=
backbone
(
inputs
)
after_states
=
set
(
states
)
after_states
=
states
new_states
=
after_states
-
before_states
new_states
=
set
(
after_states
)
-
set
(
before_states
)
if
new_states
:
raise
AttributeError
(
'Expected input and output states to be the same. '
'Got extra states {}, expected {}'
.
format
(
new_states
,
before_states
))
raise
ValueError
(
'Expected input and output states to be the same. Got extra states '
'{}, expected {}'
.
format
(
new_states
,
set
(
before_states
)))
mismatched_shapes
=
{}
for
name
in
after_states
:
before_shape
=
before_states
[
name
].
shape
after_shape
=
after_states
[
name
].
shape
if
len
(
before_shape
)
!=
len
(
after_shape
):
mismatched_shapes
[
name
]
=
(
before_shape
,
after_shape
)
continue
for
before
,
after
in
zip
(
before_shape
,
after_shape
):
if
before
is
not
None
and
after
is
not
None
and
before
!=
after
:
mismatched_shapes
[
name
]
=
(
before_shape
,
after_shape
)
break
if
mismatched_shapes
:
raise
ValueError
(
'Got mismatched input and output state shapes: {}'
.
format
(
mismatched_shapes
))
else
:
endpoints
,
states
=
backbone
(
inputs
)
...
...
official/vision/beta/projects/yolo/README.md
View file @
2b676a9b
DISCLAIMER: this YOLO implementation is still under development. No support will
be provided during the development phase.
# YOLO Object Detectors, You Only Look Once
[

](https://arxiv.org/abs/1804.02767)
...
...
@@ -74,3 +77,5 @@ head could be connected to a new, more powerful backbone if a person chose to.
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
[

](https://www.python.org/downloads/release/python-380/)
official/vision/beta/projects/yolo/configs/backbones.py
View file @
2b676a9b
...
...
@@ -24,11 +24,14 @@ from official.vision.beta.configs import backbones
@
dataclasses
.
dataclass
class
DarkNet
(
hyperparams
.
Config
):
"""DarkNet config."""
model_id
:
str
=
"darknet53"
class
Darknet
(
hyperparams
.
Config
):
"""Darknet config."""
model_id
:
str
=
'darknet53'
width_scale
:
float
=
1.0
depth_scale
:
float
=
1.0
dilate
:
bool
=
False
@
dataclasses
.
dataclass
class
Backbone
(
backbones
.
Backbone
):
darknet
:
Dark
N
et
=
Dark
N
et
()
darknet
:
Dark
n
et
=
Dark
n
et
()
official/vision/beta/projects/yolo/configs/darknet_classification.py
View file @
2b676a9b
...
...
@@ -32,7 +32,7 @@ class ImageClassificationModel(hyperparams.Config):
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'darknet'
,
res
net
=
backbones
.
Dark
N
et
())
type
=
'darknet'
,
dark
net
=
backbones
.
Dark
n
et
())
dropout_rate
:
float
=
0.0
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
()
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
...
...
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
2b676a9b
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
# Lint as: python3
"""Contains definitions of Darknet Backbone Networks.
The models are inspired by ResNet, and CSPNet
...
...
@@ -29,15 +28,15 @@ Cross Stage Partial networks (CSPNets) were proposed in:
arXiv:1911.11929
Dark
N
ets
A
re used mainly for
O
bject detection in:
Dark
n
ets
a
re used mainly for
o
bject detection in:
[1] Joseph Redmon, Ali Farhadi
YOLOv3: An Incremental Improvement. arXiv:1804.02767
[2] Alexey Bochkovskiy, Chien-Yao Wang, Hong-Yuan Mark Liao
YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv:2004.10934
"""
import
collections
import
collections
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
...
...
@@ -45,28 +44,32 @@ from official.vision.beta.modeling.backbones import factory
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
class
BlockConfig
(
object
):
"""Get layer config to make code more readable.
Args:
layer: string layer name
stack: the type of layer ordering to use for this specific level
repetitions: integer for the number of times to repeat block
bottelneck: boolean for does this stack have a bottle neck layer
filters: integer for the output depth of the level
pool_size: integer the pool_size of max pool layers
kernel_size: optional integer, for convolution kernel size
strides: integer or tuple to indicate convolution strides
padding: the padding to apply to layers in this stack
activation: string for the activation to use for this stack
route: integer for what level to route from to get the next input
output_name: the name to use for this output
is_output: is this layer an output in the default model
"""
class
BlockConfig
:
"""Class to store layer config to make code more readable."""
def
__init__
(
self
,
layer
,
stack
,
reps
,
bottleneck
,
filters
,
pool_size
,
kernel_size
,
strides
,
padding
,
activation
,
route
,
output_name
,
is_output
):
kernel_size
,
strides
,
padding
,
activation
,
route
,
dilation_rate
,
output_name
,
is_output
):
"""Initializing method for BlockConfig.
Args:
layer: A `str` for layer name.
stack: A `str` for the type of layer ordering to use for this specific
level.
reps: An `int` for the number of times to repeat block.
bottleneck: A `bool` for whether this stack has a bottle neck layer.
filters: An `int` for the output depth of the level.
pool_size: An `int` for the pool_size of max pool layers.
kernel_size: An `int` for convolution kernel size.
strides: A `Union[int, tuple]` that indicates convolution strides.
padding: An `int` for the padding to apply to layers in this stack.
activation: A `str` for the activation to use for this stack.
route: An `int` for the level to route from to get the next input.
dilation_rate: An `int` for the scale used in dialated Darknet.
output_name: A `str` for the name to use for this output.
is_output: A `bool` for whether this layer is an output in the default
model.
"""
self
.
layer
=
layer
self
.
stack
=
stack
self
.
repetitions
=
reps
...
...
@@ -78,6 +81,7 @@ class BlockConfig(object):
self
.
padding
=
padding
self
.
activation
=
activation
self
.
route
=
route
self
.
dilation_rate
=
dilation_rate
self
.
output_name
=
output_name
self
.
is_output
=
is_output
...
...
@@ -89,41 +93,41 @@ def build_block_specs(config):
return
specs
class
Layer
Factory
(
object
)
:
"""
Class fo
r
q
ui
ck look up of default layer
s.
class
Layer
Builder
:
"""
Laye
r
b
ui
lder clas
s.
Used by darknet to connect, introduce or exit a level. Used in place of an if
con
dition or switch to make adding new layers easier and to reduce redundant
code.
Class for quick look up of default layers used by darknet to
con
nect, introduce or exit a level. Used in place of an if condition
or switch to make adding new layers easier and to reduce redundant
code.
"""
def
__init__
(
self
):
self
.
_layer_dict
=
{
"
ConvBN
"
:
(
nn_blocks
.
ConvBN
,
self
.
conv_bn_config_todict
),
"
MaxPool
"
:
(
tf
.
keras
.
layers
.
MaxPool2D
,
self
.
maxpool_config_todict
)
'
ConvBN
'
:
(
nn_blocks
.
ConvBN
,
self
.
conv_bn_config_todict
),
'
MaxPool
'
:
(
tf
.
keras
.
layers
.
MaxPool2D
,
self
.
maxpool_config_todict
)
}
def
conv_bn_config_todict
(
self
,
config
,
kwargs
):
dictvals
=
{
"
filters
"
:
config
.
filters
,
"
kernel_size
"
:
config
.
kernel_size
,
"
strides
"
:
config
.
strides
,
"
padding
"
:
config
.
padding
'
filters
'
:
config
.
filters
,
'
kernel_size
'
:
config
.
kernel_size
,
'
strides
'
:
config
.
strides
,
'
padding
'
:
config
.
padding
}
dictvals
.
update
(
kwargs
)
return
dictvals
def
darktiny_config_todict
(
self
,
config
,
kwargs
):
dictvals
=
{
"
filters
"
:
config
.
filters
,
"
strides
"
:
config
.
strides
}
dictvals
=
{
'
filters
'
:
config
.
filters
,
'
strides
'
:
config
.
strides
}
dictvals
.
update
(
kwargs
)
return
dictvals
def
maxpool_config_todict
(
self
,
config
,
kwargs
):
return
{
"
pool_size
"
:
config
.
pool_size
,
"
strides
"
:
config
.
strides
,
"
padding
"
:
config
.
padding
,
"
name
"
:
kwargs
[
"
name
"
]
'
pool_size
'
:
config
.
pool_size
,
'
strides
'
:
config
.
strides
,
'
padding
'
:
config
.
padding
,
'
name
'
:
kwargs
[
'
name
'
]
}
def
__call__
(
self
,
config
,
kwargs
):
...
...
@@ -134,90 +138,259 @@ class LayerFactory(object):
# model configs
LISTNAMES
=
[
"
default_layer_name
"
,
"
level_type
"
,
"
number_of_layers_in_level
"
,
"
bottleneck
"
,
"
filters
"
,
"
kernal_size
"
,
"
pool_size
"
,
"
strides
"
,
"
padding
"
,
"
default_activation
"
,
"
route
"
,
"
level/name
"
,
"
is_output
"
'
default_layer_name
'
,
'
level_type
'
,
'
number_of_layers_in_level
'
,
'
bottleneck
'
,
'
filters
'
,
'
kernal_size
'
,
'
pool_size
'
,
'
strides
'
,
'
padding
'
,
'
default_activation
'
,
'
route
'
,
'dilation'
,
'
level/name
'
,
'
is_output
'
]
# pylint: disable=line-too-long
CSPDARKNET53
=
{
"list_names"
:
LISTNAMES
,
"splits"
:
{
"backbone_split"
:
106
,
"neck_split"
:
138
},
"backbone"
:
[
[
"ConvBN"
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
"same"
,
"mish"
,
-
1
,
0
,
False
],
[
"DarkRes"
,
"csp"
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
"mish"
,
-
1
,
1
,
False
],
[
"DarkRes"
,
"csp"
,
2
,
False
,
128
,
None
,
None
,
None
,
None
,
"mish"
,
-
1
,
2
,
False
],
[
"DarkRes"
,
"csp"
,
8
,
False
,
256
,
None
,
None
,
None
,
None
,
"mish"
,
-
1
,
3
,
True
],
[
"DarkRes"
,
"csp"
,
8
,
False
,
512
,
None
,
None
,
None
,
None
,
"mish"
,
-
1
,
4
,
True
],
[
"DarkRes"
,
"csp"
,
4
,
False
,
1024
,
None
,
None
,
None
,
None
,
"mish"
,
-
1
,
5
,
True
],
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
106
,
'neck_split'
:
132
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
'same'
,
'mish'
,
-
1
,
1
,
0
,
False
],
[
'DarkRes'
,
'csp'
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
1
,
False
],
[
'DarkRes'
,
'csp'
,
2
,
False
,
128
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
2
,
False
],
[
'DarkRes'
,
'csp'
,
8
,
False
,
256
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
3
,
True
],
[
'DarkRes'
,
'csp'
,
8
,
False
,
512
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
2
,
4
,
True
],
[
'DarkRes'
,
'csp'
,
4
,
False
,
1024
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
4
,
5
,
True
],
]
}
CSPADARKNET53
=
{
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
100
,
'neck_split'
:
135
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
'same'
,
'mish'
,
-
1
,
1
,
0
,
False
],
[
'DarkRes'
,
'residual'
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
1
,
False
],
[
'DarkRes'
,
'csp'
,
2
,
False
,
128
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
2
,
False
],
[
'DarkRes'
,
'csp'
,
8
,
False
,
256
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
3
,
True
],
[
'DarkRes'
,
'csp'
,
8
,
False
,
512
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
2
,
4
,
True
],
[
'DarkRes'
,
'csp'
,
4
,
False
,
1024
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
4
,
5
,
True
],
]
}
LARGECSP53
=
{
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
100
,
'neck_split'
:
135
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
'same'
,
'mish'
,
-
1
,
1
,
0
,
False
],
[
'DarkRes'
,
'csp'
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
1
,
False
],
[
'DarkRes'
,
'csp'
,
3
,
False
,
128
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
2
,
False
],
[
'DarkRes'
,
'csp'
,
15
,
False
,
256
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
1
,
3
,
True
],
[
'DarkRes'
,
'csp'
,
15
,
False
,
512
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
2
,
4
,
True
],
[
'DarkRes'
,
'csp'
,
7
,
False
,
1024
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
4
,
5
,
True
],
[
'DarkRes'
,
'csp'
,
7
,
False
,
1024
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
8
,
6
,
True
],
[
'DarkRes'
,
'csp'
,
7
,
False
,
1024
,
None
,
None
,
None
,
None
,
'mish'
,
-
1
,
16
,
7
,
True
],
]
}
DARKNET53
=
{
"list_names"
:
LISTNAMES
,
"splits"
:
{
"backbone_split"
:
76
},
"backbone"
:
[
[
"ConvBN"
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
"same"
,
"leaky"
,
-
1
,
0
,
False
],
[
"DarkRes"
,
"residual"
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
"leaky"
,
-
1
,
1
,
False
],
[
"DarkRes"
,
"residual"
,
2
,
False
,
128
,
None
,
None
,
None
,
None
,
"leaky"
,
-
1
,
2
,
False
],
[
"DarkRes"
,
"residual"
,
8
,
False
,
256
,
None
,
None
,
None
,
None
,
"leaky"
,
-
1
,
3
,
True
],
[
"DarkRes"
,
"residual"
,
8
,
False
,
512
,
None
,
None
,
None
,
None
,
"leaky"
,
-
1
,
4
,
True
],
[
"DarkRes"
,
"residual"
,
4
,
False
,
1024
,
None
,
None
,
None
,
None
,
"leaky"
,
-
1
,
5
,
True
],
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
76
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
32
,
None
,
3
,
1
,
'same'
,
'leaky'
,
-
1
,
1
,
0
,
False
],
[
'DarkRes'
,
'residual'
,
1
,
True
,
64
,
None
,
None
,
None
,
None
,
'leaky'
,
-
1
,
1
,
1
,
False
],
[
'DarkRes'
,
'residual'
,
2
,
False
,
128
,
None
,
None
,
None
,
None
,
'leaky'
,
-
1
,
1
,
2
,
False
],
[
'DarkRes'
,
'residual'
,
8
,
False
,
256
,
None
,
None
,
None
,
None
,
'leaky'
,
-
1
,
1
,
3
,
True
],
[
'DarkRes'
,
'residual'
,
8
,
False
,
512
,
None
,
None
,
None
,
None
,
'leaky'
,
-
1
,
2
,
4
,
True
],
[
'DarkRes'
,
'residual'
,
4
,
False
,
1024
,
None
,
None
,
None
,
None
,
'leaky'
,
-
1
,
4
,
5
,
True
],
]
}
CSPDARKNETTINY
=
{
"list_names"
:
LISTNAMES
,
"splits"
:
{
"backbone_split"
:
28
},
"backbone"
:
[
[
"ConvBN"
,
None
,
1
,
False
,
32
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
0
,
False
],
[
"ConvBN"
,
None
,
1
,
False
,
64
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
1
,
False
],
[
"CSPTiny"
,
"csp_tiny"
,
1
,
False
,
64
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
2
,
False
],
[
"CSPTiny"
,
"csp_tiny"
,
1
,
False
,
128
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
3
,
False
],
[
"CSPTiny"
,
"csp_tiny"
,
1
,
False
,
256
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
4
,
True
],
[
"ConvBN"
,
None
,
1
,
False
,
512
,
None
,
3
,
1
,
"same"
,
"leaky"
,
-
1
,
5
,
True
],
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
28
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
32
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
0
,
False
],
[
'ConvBN'
,
None
,
1
,
False
,
64
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
1
,
False
],
[
'CSPTiny'
,
'csp_tiny'
,
1
,
False
,
64
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
2
,
False
],
[
'CSPTiny'
,
'csp_tiny'
,
1
,
False
,
128
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
3
,
False
],
[
'CSPTiny'
,
'csp_tiny'
,
1
,
False
,
256
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
4
,
True
],
[
'ConvBN'
,
None
,
1
,
False
,
512
,
None
,
3
,
1
,
'same'
,
'leaky'
,
-
1
,
1
,
5
,
True
],
]
}
DARKNETTINY
=
{
"list_names"
:
LISTNAMES
,
"splits"
:
{
"backbone_split"
:
14
},
"backbone"
:
[
[
"ConvBN"
,
None
,
1
,
False
,
16
,
None
,
3
,
1
,
"same"
,
"leaky"
,
-
1
,
0
,
False
],
[
"DarkTiny"
,
"tiny"
,
1
,
True
,
32
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
1
,
False
],
[
"DarkTiny"
,
"tiny"
,
1
,
True
,
64
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
2
,
False
],
[
"DarkTiny"
,
"tiny"
,
1
,
False
,
128
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
3
,
False
],
[
"DarkTiny"
,
"tiny"
,
1
,
False
,
256
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
4
,
True
],
[
"DarkTiny"
,
"tiny"
,
1
,
False
,
512
,
None
,
3
,
2
,
"same"
,
"leaky"
,
-
1
,
5
,
False
],
[
"DarkTiny"
,
"tiny"
,
1
,
False
,
1024
,
None
,
3
,
1
,
"same"
,
"leaky"
,
-
1
,
5
,
True
],
'list_names'
:
LISTNAMES
,
'splits'
:
{
'backbone_split'
:
14
},
'backbone'
:
[
[
'ConvBN'
,
None
,
1
,
False
,
16
,
None
,
3
,
1
,
'same'
,
'leaky'
,
-
1
,
1
,
0
,
False
],
[
'DarkTiny'
,
'tiny'
,
1
,
True
,
32
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
1
,
False
],
[
'DarkTiny'
,
'tiny'
,
1
,
True
,
64
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
2
,
False
],
[
'DarkTiny'
,
'tiny'
,
1
,
False
,
128
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
3
,
False
],
[
'DarkTiny'
,
'tiny'
,
1
,
False
,
256
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
4
,
True
],
[
'DarkTiny'
,
'tiny'
,
1
,
False
,
512
,
None
,
3
,
2
,
'same'
,
'leaky'
,
-
1
,
1
,
5
,
False
],
[
'DarkTiny'
,
'tiny'
,
1
,
False
,
1024
,
None
,
3
,
1
,
'same'
,
'leaky'
,
-
1
,
1
,
5
,
True
],
]
}
# pylint: enable=line-too-long
BACKBONES
=
{
"darknettiny"
:
DARKNETTINY
,
"darknet53"
:
DARKNET53
,
"cspdarknet53"
:
CSPDARKNET53
,
"cspdarknettiny"
:
CSPDARKNETTINY
'darknettiny'
:
DARKNETTINY
,
'darknet53'
:
DARKNET53
,
'cspdarknet53'
:
CSPDARKNET53
,
'altered_cspdarknet53'
:
CSPADARKNET53
,
'cspdarknettiny'
:
CSPDARKNETTINY
,
'csp-large'
:
LARGECSP53
,
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"
yolo
"
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'
yolo
'
)
class
Darknet
(
tf
.
keras
.
Model
):
"""Darknet backbone."""
"""
The
Darknet backbone
architecture
."""
def
__init__
(
self
,
model_id
=
"
darknet53
"
,
model_id
=
'
darknet53
'
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
min_level
=
None
,
max_level
=
5
,
width_scale
=
1.0
,
depth_scale
=
1.0
,
csp_level_mod
=
(),
activation
=
None
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
"glorot_uniform"
,
dilate
=
False
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
...
...
@@ -227,12 +400,13 @@ class Darknet(tf.keras.Model):
self
.
_model_name
=
model_id
self
.
_splits
=
splits
self
.
_input_shape
=
input_specs
self
.
_registry
=
Layer
Factory
()
self
.
_registry
=
Layer
Builder
()
# default layer look up
self
.
_min_size
=
min_level
self
.
_max_size
=
max_level
self
.
_output_specs
=
None
self
.
_csp_level_mod
=
set
(
csp_level_mod
)
self
.
_kernel_initializer
=
kernel_initializer
self
.
_bias_regularizer
=
bias_regularizer
...
...
@@ -241,16 +415,20 @@ class Darknet(tf.keras.Model):
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_dilate
=
dilate
self
.
_width_scale
=
width_scale
self
.
_depth_scale
=
depth_scale
self
.
_default_dict
=
{
"kernel_initializer"
:
self
.
_kernel_initializer
,
"kernel_regularizer"
:
self
.
_kernel_regularizer
,
"bias_regularizer"
:
self
.
_bias_regularizer
,
"norm_momentum"
:
self
.
_norm_momentum
,
"norm_epsilon"
:
self
.
_norm_epislon
,
"use_sync_bn"
:
self
.
_use_sync_bn
,
"activation"
:
self
.
_activation
,
"name"
:
None
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epislon
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'activation'
:
self
.
_activation
,
'dilation_rate'
:
1
,
'name'
:
None
}
inputs
=
tf
.
keras
.
layers
.
Input
(
shape
=
self
.
_input_shape
.
shape
[
1
:])
...
...
@@ -273,33 +451,39 @@ class Darknet(tf.keras.Model):
endpoints
=
collections
.
OrderedDict
()
stack_outputs
=
[
inputs
]
for
i
,
config
in
enumerate
(
net
):
if
config
.
output_name
>
self
.
_max_size
:
break
if
config
.
output_name
in
self
.
_csp_level_mod
:
config
.
stack
=
'residual'
config
.
filters
=
int
(
config
.
filters
*
self
.
_width_scale
)
config
.
repetitions
=
int
(
config
.
repetitions
*
self
.
_depth_scale
)
if
config
.
stack
is
None
:
x
=
self
.
_build_block
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
"
{
config
.
layer
}
_
{
i
}
"
)
x
=
self
.
_build_block
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
'
{
config
.
layer
}
_
{
i
}
'
)
stack_outputs
.
append
(
x
)
elif
config
.
stack
==
"residual"
:
x
=
self
.
_residual_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
"
{
config
.
layer
}
_
{
i
}
"
)
elif
config
.
stack
==
'residual'
:
x
=
self
.
_residual_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
'
{
config
.
layer
}
_
{
i
}
'
)
stack_outputs
.
append
(
x
)
elif
config
.
stack
==
"csp"
:
x
=
self
.
_csp_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
"
{
config
.
layer
}
_
{
i
}
"
)
elif
config
.
stack
==
'csp'
:
x
=
self
.
_csp_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
'
{
config
.
layer
}
_
{
i
}
'
)
stack_outputs
.
append
(
x
)
elif
config
.
stack
==
"
csp_tiny
"
:
x_pass
,
x
=
self
.
_csp_tiny_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
"
{
config
.
layer
}
_
{
i
}
"
)
elif
config
.
stack
==
'
csp_tiny
'
:
x_pass
,
x
=
self
.
_csp_tiny_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
'
{
config
.
layer
}
_
{
i
}
'
)
stack_outputs
.
append
(
x_pass
)
elif
config
.
stack
==
"tiny"
:
x
=
self
.
_tiny_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
"
{
config
.
layer
}
_
{
i
}
"
)
elif
config
.
stack
==
'tiny'
:
x
=
self
.
_tiny_stack
(
stack_outputs
[
config
.
route
],
config
,
name
=
f
'
{
config
.
layer
}
_
{
i
}
'
)
stack_outputs
.
append
(
x
)
if
(
config
.
is_output
and
self
.
_min_size
is
None
):
endpoints
[
str
(
config
.
output_name
)]
=
x
elif
self
.
_min_size
is
not
None
and
config
.
output_name
>=
self
.
_min_size
and
config
.
output_name
<=
self
.
_max_size
:
elif
(
self
.
_min_size
is
not
None
and
config
.
output_name
>=
self
.
_min_size
and
config
.
output_name
<=
self
.
_max_size
):
endpoints
[
str
(
config
.
output_name
)]
=
x
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
.
keys
()}
...
...
@@ -308,8 +492,7 @@ class Darknet(tf.keras.Model):
def
_get_activation
(
self
,
activation
):
if
self
.
_activation
is
None
:
return
activation
else
:
return
self
.
_activation
return
self
.
_activation
def
_csp_stack
(
self
,
inputs
,
config
,
name
):
if
config
.
bottleneck
:
...
...
@@ -320,86 +503,135 @@ class Darknet(tf.keras.Model):
csp_filter_scale
=
2
residual_filter_scale
=
1
scale_filters
=
2
self
.
_default_dict
[
"activation"
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_csp_down"
x
,
x_route
=
nn_blocks
.
CSPRoute
(
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
for
i
in
range
(
config
.
repetitions
):
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_
{
i
}
"
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
//
scale_filters
,
filter_scale
=
residual_filter_scale
,
**
self
.
_default_dict
)(
x
)
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_csp_connect"
output
=
nn_blocks
.
CSPConnect
(
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
**
self
.
_default_dict
)([
x
,
x_route
])
self
.
_default_dict
[
"activation"
]
=
self
.
_activation
self
.
_default_dict
[
"name"
]
=
None
self
.
_default_dict
[
'activation'
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_down'
if
self
.
_dilate
:
self
.
_default_dict
[
'dilation_rate'
]
=
config
.
dilation_rate
else
:
self
.
_default_dict
[
'dilation_rate'
]
=
1
# swap/add dilation
x
,
x_route
=
nn_blocks
.
CSPRoute
(
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
dilated_reps
=
config
.
repetitions
-
self
.
_default_dict
[
'dilation_rate'
]
//
2
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
//
scale_filters
,
filter_scale
=
residual_filter_scale
,
**
self
.
_default_dict
)(
x
)
for
i
in
range
(
dilated_reps
,
config
.
repetitions
):
self
.
_default_dict
[
'dilation_rate'
]
=
self
.
_default_dict
[
'dilation_rate'
]
//
2
self
.
_default_dict
[
'name'
]
=
f
"
{
name
}
_
{
i
}
_degridded_
{
self
.
_default_dict
[
'dilation_rate'
]
}
"
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
//
scale_filters
,
filter_scale
=
residual_filter_scale
,
**
self
.
_default_dict
)(
x
)
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_connect'
output
=
nn_blocks
.
CSPConnect
(
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
**
self
.
_default_dict
)([
x
,
x_route
])
self
.
_default_dict
[
'activation'
]
=
self
.
_activation
self
.
_default_dict
[
'name'
]
=
None
return
output
def
_csp_tiny_stack
(
self
,
inputs
,
config
,
name
):
self
.
_default_dict
[
"activation"
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_csp_tiny"
x
,
x_route
=
nn_blocks
.
CSPTiny
(
filters
=
config
.
filters
,
**
self
.
_default_dict
)(
inputs
)
self
.
_default_dict
[
"activation"
]
=
self
.
_activation
self
.
_default_dict
[
"name"
]
=
None
self
.
_default_dict
[
'activation'
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_tiny'
x
,
x_route
=
nn_blocks
.
CSPTiny
(
filters
=
config
.
filters
,
**
self
.
_default_dict
)(
inputs
)
self
.
_default_dict
[
'activation'
]
=
self
.
_activation
self
.
_default_dict
[
'name'
]
=
None
return
x
,
x_route
def
_tiny_stack
(
self
,
inputs
,
config
,
name
):
x
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
2
,
strides
=
config
.
strides
,
padding
=
"same"
,
data_format
=
None
,
name
=
f
"
{
name
}
_tiny/pool"
)(
inputs
)
self
.
_default_dict
[
"activation"
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_tiny/conv"
x
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
2
,
strides
=
config
.
strides
,
padding
=
'same'
,
data_format
=
None
,
name
=
f
'
{
name
}
_tiny/pool'
)(
inputs
)
self
.
_default_dict
[
'activation'
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_tiny/conv'
x
=
nn_blocks
.
ConvBN
(
filters
=
config
.
filters
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
"
same
"
,
padding
=
'
same
'
,
**
self
.
_default_dict
)(
x
)
self
.
_default_dict
[
"
activation
"
]
=
self
.
_activation
self
.
_default_dict
[
"
name
"
]
=
None
self
.
_default_dict
[
'
activation
'
]
=
self
.
_activation
self
.
_default_dict
[
'
name
'
]
=
None
return
x
def
_residual_stack
(
self
,
inputs
,
config
,
name
):
self
.
_default_dict
[
"activation"
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_residual_down"
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
for
i
in
range
(
config
.
repetitions
-
1
):
self
.
_default_dict
[
"name"
]
=
f
"
{
name
}
_
{
i
}
"
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
,
**
self
.
_default_dict
)(
x
)
self
.
_default_dict
[
"activation"
]
=
self
.
_activation
self
.
_default_dict
[
"name"
]
=
None
self
.
_default_dict
[
'activation'
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_residual_down'
if
self
.
_dilate
:
self
.
_default_dict
[
'dilation_rate'
]
=
config
.
dilation_rate
if
config
.
repetitions
<
8
:
config
.
repetitions
+=
2
else
:
self
.
_default_dict
[
'dilation_rate'
]
=
1
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
dilated_reps
=
config
.
repetitions
-
(
self
.
_default_dict
[
'dilation_rate'
]
//
2
)
-
1
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
,
**
self
.
_default_dict
)(
x
)
for
i
in
range
(
dilated_reps
,
config
.
repetitions
-
1
):
self
.
_default_dict
[
'dilation_rate'
]
=
self
.
_default_dict
[
'dilation_rate'
]
//
2
self
.
_default_dict
[
'name'
]
=
f
"
{
name
}
_
{
i
}
_degridded_
{
self
.
_default_dict
[
'dilation_rate'
]
}
"
x
=
nn_blocks
.
DarkResidual
(
filters
=
config
.
filters
,
**
self
.
_default_dict
)(
x
)
self
.
_default_dict
[
'activation'
]
=
self
.
_activation
self
.
_default_dict
[
'name'
]
=
None
self
.
_default_dict
[
'dilation_rate'
]
=
1
return
x
def
_build_block
(
self
,
inputs
,
config
,
name
):
x
=
inputs
i
=
0
self
.
_default_dict
[
"
activation
"
]
=
self
.
_get_activation
(
config
.
activation
)
self
.
_default_dict
[
'
activation
'
]
=
self
.
_get_activation
(
config
.
activation
)
while
i
<
config
.
repetitions
:
self
.
_default_dict
[
"
name
"
]
=
f
"
{
name
}
_
{
i
}
"
self
.
_default_dict
[
'
name
'
]
=
f
'
{
name
}
_
{
i
}
'
layer
=
self
.
_registry
(
config
,
self
.
_default_dict
)
x
=
layer
(
x
)
i
+=
1
self
.
_default_dict
[
"
activation
"
]
=
self
.
_activation
self
.
_default_dict
[
"
name
"
]
=
None
self
.
_default_dict
[
'
activation
'
]
=
self
.
_activation
self
.
_default_dict
[
'
name
'
]
=
None
return
x
@
staticmethod
def
get_model_config
(
name
):
name
=
name
.
lower
()
backbone
=
BACKBONES
[
name
][
"
backbone
"
]
splits
=
BACKBONES
[
name
][
"
splits
"
]
backbone
=
BACKBONES
[
name
][
'
backbone
'
]
splits
=
BACKBONES
[
name
][
'
splits
'
]
return
build_block_specs
(
backbone
),
splits
@
property
...
...
@@ -412,35 +644,41 @@ class Darknet(tf.keras.Model):
def
get_config
(
self
):
layer_config
=
{
"
model_id
"
:
self
.
_model_name
,
"
min_level
"
:
self
.
_min_size
,
"
max_level
"
:
self
.
_max_size
,
"
kernel_initializer
"
:
self
.
_kernel_initializer
,
"
kernel_regularizer
"
:
self
.
_kernel_regularizer
,
"
bias_regularizer
"
:
self
.
_bias_regularizer
,
"
norm_momentum
"
:
self
.
_norm_momentum
,
"
norm_epsilon
"
:
self
.
_norm_epislon
,
"
use_sync_bn
"
:
self
.
_use_sync_bn
,
"
activation
"
:
self
.
_activation
'
model_id
'
:
self
.
_model_name
,
'
min_level
'
:
self
.
_min_size
,
'
max_level
'
:
self
.
_max_size
,
'
kernel_initializer
'
:
self
.
_kernel_initializer
,
'
kernel_regularizer
'
:
self
.
_kernel_regularizer
,
'
bias_regularizer
'
:
self
.
_bias_regularizer
,
'
norm_momentum
'
:
self
.
_norm_momentum
,
'
norm_epsilon
'
:
self
.
_norm_epislon
,
'
use_sync_bn
'
:
self
.
_use_sync_bn
,
'
activation
'
:
self
.
_activation
,
}
return
layer_config
@
factory
.
register_backbone_builder
(
"
darknet
"
)
@
factory
.
register_backbone_builder
(
'
darknet
'
)
def
build_darknet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds darknet
backbone
."""
"""Builds darknet."""
backbone_cfg
=
backbone_config
.
get
()
model
=
Darknet
(
model_id
=
backbone_cfg
.
model_id
,
input_shape
=
input_specs
,
min_level
=
backbone_cfg
.
min_level
,
max_level
=
backbone_cfg
.
max_level
,
input_specs
=
input_specs
,
dilate
=
backbone_cfg
.
dilate
,
width_scale
=
backbone_cfg
.
width_scale
,
depth_scale
=
backbone_cfg
.
depth_scale
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
model
.
summary
()
return
model
official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py
View file @
2b676a9b
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# Lint as: python3
"""Tests for
resnet
."""
"""Tests for
yolo
."""
from
absl.testing
import
parameterized
import
numpy
as
np
...
...
@@ -24,35 +24,48 @@ from tensorflow.python.distribute import strategy_combinations
from
official.vision.beta.projects.yolo.modeling.backbones
import
darknet
class
Dark
N
etTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
Dark
n
etTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
224
,
"
darknet53
"
,
2
,
1
),
(
224
,
"
darknettiny
"
,
1
,
2
),
(
224
,
"
cspdarknettiny
"
,
1
,
1
),
(
224
,
"
cspdarknet53
"
,
2
,
1
),
(
224
,
'
darknet53
'
,
2
,
1
,
True
),
(
224
,
'
darknettiny
'
,
1
,
2
,
False
),
(
224
,
'
cspdarknettiny
'
,
1
,
1
,
False
),
(
224
,
'
cspdarknet53
'
,
2
,
1
,
True
),
)
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
scale_final
):
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
scale_final
,
dilate
):
"""Test creation of ResNet family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
network
=
darknet
.
Darknet
(
model_id
=
model_id
,
min_level
=
3
,
max_level
=
5
)
network
=
darknet
.
Darknet
(
model_id
=
model_id
,
min_level
=
3
,
max_level
=
5
,
dilate
=
dilate
)
self
.
assertEqual
(
network
.
model_id
,
model_id
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
"3"
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
256
*
endpoint_filter_scale
],
endpoints
[
"4"
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
"5"
].
shape
.
as_list
())
if
dilate
:
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
256
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
'5'
].
shape
.
as_list
())
else
:
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
256
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
'5'
].
shape
.
as_list
())
@
combinations
.
generate
(
combinations
.
combine
(
...
...
@@ -66,20 +79,20 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
1
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
with
strategy
.
scope
():
network
=
darknet
.
Darknet
(
model_id
=
"
darknet53
"
,
min_size
=
3
,
max_size
=
5
)
network
=
darknet
.
Darknet
(
model_id
=
'
darknet53
'
,
min_size
=
3
,
max_size
=
5
)
_
=
network
(
inputs
)
@
parameterized
.
parameters
(
1
,
3
,
4
)
def
test_input_specs
(
self
,
input_dim
):
"""Test different input feature dimensions."""
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
input_dim
])
network
=
darknet
.
Darknet
(
model_id
=
"
darknet53
"
,
min_level
=
3
,
max_level
=
5
,
input_specs
=
input_specs
)
model_id
=
'
darknet53
'
,
min_level
=
3
,
max_level
=
5
,
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
224
,
224
,
input_dim
),
batch_size
=
1
)
_
=
network
(
inputs
)
...
...
@@ -87,14 +100,14 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
model_id
=
"
darknet53
"
,
model_id
=
'
darknet53
'
,
min_level
=
3
,
max_level
=
5
,
use_sync_bn
=
False
,
activation
=
"
relu
"
,
activation
=
'
relu
'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
"
VarianceScaling
"
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
...
...
@@ -113,5 +126,5 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/modeling/decoders/__init__.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
_IdentityRoute
(
tf
.
keras
.
layers
.
Layer
):
def
call
(
self
,
inputs
):
return
None
,
inputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloFPN
(
tf
.
keras
.
layers
.
Layer
):
"""YOLO Feature pyramid network."""
def
__init__
(
self
,
fpn_depth
=
4
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
activation
=
'leaky'
,
fpn_filter_scale
=
1
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""Yolo FPN initialization function (Yolo V4).
Args:
fpn_depth: `int`, number of layers to use in each FPN path
if you choose to use an FPN.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization momentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_fpn_depth
=
fpn_depth
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_spatial_attention
=
use_spatial_attention
self
.
_filter_scale
=
fpn_filter_scale
self
.
_csp_stack
=
csp_stack
self
.
_base_config
=
dict
(
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
bias_regularizer
=
self
.
_bias_regularizer
,
norm_epsilon
=
self
.
_norm_epsilon
,
norm_momentum
=
self
.
_norm_momentum
)
def
get_raw_depths
(
self
,
minimum_depth
,
inputs
):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth (int): depth of the smallest branch of the FPN.
inputs (dict): dictionary of the shape of input args as a dictionary of
lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths
=
[]
for
i
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
inputs
[
str
(
i
)][
-
1
]
/
self
.
_filter_scale
)
return
list
(
reversed
(
depths
))
def
build
(
self
,
inputs
):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
keys
=
[
int
(
key
)
for
key
in
inputs
.
keys
()]
self
.
_min_level
=
min
(
keys
)
self
.
_max_level
=
max
(
keys
)
self
.
_min_depth
=
inputs
[
str
(
self
.
_min_level
)][
-
1
]
self
.
_depths
=
self
.
get_raw_depths
(
self
.
_min_depth
,
inputs
)
# directly connect to an input path and process it
self
.
preprocessors
=
dict
()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self
.
resamples
=
dict
()
# set of convoltion layers and upsample layers that are used to
# prepare the FPN processors for output
for
level
,
depth
in
zip
(
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)),
self
.
_depths
):
if
level
==
self
.
_min_level
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
depth
//
2
,
inverted
=
True
,
upsample
=
True
,
drop_final
=
self
.
_csp_stack
==
0
,
upsample_size
=
2
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
_IdentityRoute
()
elif
level
!=
self
.
_max_level
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
depth
//
2
,
inverted
=
True
,
upsample
=
True
,
drop_final
=
False
,
upsample_size
=
2
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
depth
,
repetitions
=
self
.
_fpn_depth
-
int
(
level
==
self
.
_min_level
),
block_invert
=
True
,
insert_spp
=
False
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
else
:
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
depth
,
repetitions
=
self
.
_fpn_depth
+
1
*
int
(
self
.
_csp_stack
==
0
),
insert_spp
=
True
,
block_invert
=
False
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
def
call
(
self
,
inputs
):
outputs
=
dict
()
layer_in
=
inputs
[
str
(
self
.
_max_level
)]
for
level
in
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)):
_
,
x
=
self
.
preprocessors
[
str
(
level
)](
layer_in
)
outputs
[
str
(
level
)]
=
x
if
level
>
self
.
_min_level
:
x_next
=
inputs
[
str
(
level
-
1
)]
_
,
layer_in
=
self
.
resamples
[
str
(
level
-
1
)]([
x_next
,
x
])
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloPAN
(
tf
.
keras
.
layers
.
Layer
):
"""YOLO Path Aggregation Network."""
def
__init__
(
self
,
path_process_len
=
6
,
max_level_process_len
=
None
,
embed_spp
=
False
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
activation
=
'leaky'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
fpn_input
=
True
,
fpn_filter_scale
=
1.0
,
**
kwargs
):
"""Yolo Path Aggregation Network initialization function (Yolo V3 and V4).
Args:
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing
by zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
**kwargs: keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_path_process_len
=
path_process_len
self
.
_embed_spp
=
embed_spp
self
.
_use_spatial_attention
=
use_spatial_attention
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_fpn_input
=
fpn_input
self
.
_max_level_process_len
=
max_level_process_len
self
.
_csp_stack
=
csp_stack
self
.
_fpn_filter_scale
=
fpn_filter_scale
if
max_level_process_len
is
None
:
self
.
_max_level_process_len
=
path_process_len
self
.
_base_config
=
dict
(
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
bias_regularizer
=
self
.
_bias_regularizer
,
norm_epsilon
=
self
.
_norm_epsilon
,
norm_momentum
=
self
.
_norm_momentum
)
def
build
(
self
,
inputs
):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
# define the key order
keys
=
[
int
(
key
)
for
key
in
inputs
.
keys
()]
self
.
_min_level
=
min
(
keys
)
self
.
_max_level
=
max
(
keys
)
self
.
_min_depth
=
inputs
[
str
(
self
.
_min_level
)][
-
1
]
self
.
_depths
=
self
.
get_raw_depths
(
self
.
_min_depth
,
inputs
)
# directly connect to an input path and process it
self
.
preprocessors
=
dict
()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self
.
resamples
=
dict
()
# FPN will reverse the key process order for the backbone, so we need
# adjust the order that objects are created and processed to adjust for
# this. not using an FPN will directly connect the decoder to the backbone
# therefore the object creation order needs to be done from the largest
# to smallest level.
if
self
.
_fpn_input
:
# process order {... 3, 4, 5}
self
.
_iterator
=
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)
self
.
_check
=
lambda
x
:
x
<
self
.
_max_level
self
.
_key_shift
=
lambda
x
:
x
+
1
self
.
_input
=
self
.
_min_level
downsample
=
True
upsample
=
False
else
:
# process order {5, 4, 3, ...}
self
.
_iterator
=
list
(
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)))
self
.
_check
=
lambda
x
:
x
>
self
.
_min_level
self
.
_key_shift
=
lambda
x
:
x
-
1
self
.
_input
=
self
.
_max_level
downsample
=
False
upsample
=
True
if
self
.
_csp_stack
==
0
:
proc_filters
=
lambda
x
:
x
resample_filters
=
lambda
x
:
x
//
2
else
:
proc_filters
=
lambda
x
:
x
*
2
resample_filters
=
lambda
x
:
x
for
level
,
depth
in
zip
(
self
.
_iterator
,
self
.
_depths
):
if
level
==
self
.
_input
:
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
proc_filters
(
depth
),
repetitions
=
self
.
_max_level_process_len
,
insert_spp
=
self
.
_embed_spp
,
block_invert
=
False
,
insert_sam
=
self
.
_use_spatial_attention
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
else
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
resample_filters
(
depth
),
upsample
=
upsample
,
downsample
=
downsample
,
inverted
=
False
,
drop_final
=
self
.
_csp_stack
==
0
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
proc_filters
(
depth
),
repetitions
=
self
.
_path_process_len
,
insert_spp
=
False
,
insert_sam
=
self
.
_use_spatial_attention
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
def
get_raw_depths
(
self
,
minimum_depth
,
inputs
):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth: `int` depth of the smallest branch of the FPN.
inputs: `dict[str, tf.InputSpec]` of the shape of input args as a
dictionary of lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths
=
[]
if
len
(
inputs
.
keys
())
>
3
or
self
.
_fpn_filter_scale
>
1
:
for
i
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
inputs
[
str
(
i
)][
-
1
]
*
2
)
else
:
for
_
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
minimum_depth
)
minimum_depth
*=
2
if
self
.
_fpn_input
:
return
depths
return
list
(
reversed
(
depths
))
def
call
(
self
,
inputs
):
outputs
=
dict
()
layer_in
=
inputs
[
str
(
self
.
_input
)]
for
level
in
self
.
_iterator
:
x_route
,
x
=
self
.
preprocessors
[
str
(
level
)](
layer_in
)
outputs
[
str
(
level
)]
=
x
if
self
.
_check
(
level
):
x_next
=
inputs
[
str
(
self
.
_key_shift
(
level
))]
_
,
layer_in
=
self
.
resamples
[
str
(
self
.
_key_shift
(
level
))]([
x_route
,
x_next
])
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloDecoder
(
tf
.
keras
.
Model
):
"""Darknet Backbone Decoder."""
def
__init__
(
self
,
input_specs
,
use_fpn
=
False
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
fpn_depth
=
4
,
fpn_filter_scale
=
1
,
path_process_len
=
6
,
max_level_process_len
=
None
,
embed_spp
=
False
,
activation
=
'leaky'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""Yolo Decoder initialization function.
A unified model that ties all decoder components into a conditionally build
YOLO decoder.
Args:
input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs
to the heads.
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_use_fpn
=
use_fpn
self
.
_fpn_depth
=
fpn_depth
self
.
_path_process_len
=
path_process_len
self
.
_max_level_process_len
=
max_level_process_len
self
.
_embed_spp
=
embed_spp
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_base_config
=
dict
(
use_spatial_attention
=
use_spatial_attention
,
csp_stack
=
csp_stack
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
fpn_filter_scale
=
fpn_filter_scale
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_decoder_config
=
dict
(
path_process_len
=
self
.
_path_process_len
,
max_level_process_len
=
self
.
_max_level_process_len
,
embed_spp
=
self
.
_embed_spp
,
fpn_input
=
self
.
_use_fpn
,
**
self
.
_base_config
)
inputs
=
{
key
:
tf
.
keras
.
layers
.
Input
(
shape
=
value
[
1
:])
for
key
,
value
in
input_specs
.
items
()
}
if
self
.
_use_fpn
:
inter_outs
=
YoloFPN
(
fpn_depth
=
self
.
_fpn_depth
,
**
self
.
_base_config
)(
inputs
)
outputs
=
YoloPAN
(
**
self
.
_decoder_config
)(
inter_outs
)
else
:
inter_outs
=
None
outputs
=
YoloPAN
(
**
self
.
_decoder_config
)(
inputs
)
self
.
_output_specs
=
{
key
:
value
.
shape
for
key
,
value
in
outputs
.
items
()}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
outputs
,
name
=
'YoloDecoder'
)
@
property
def
use_fpn
(
self
):
return
self
.
_use_fpn
@
property
def
output_specs
(
self
):
return
self
.
_output_specs
def
get_config
(
self
):
config
=
dict
(
input_specs
=
self
.
_input_specs
,
use_fpn
=
self
.
_use_fpn
,
fpn_depth
=
self
.
_fpn_depth
,
**
self
.
_decoder_config
)
return
config
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment