Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2b676a9b
Commit
2b676a9b
authored
Jun 16, 2021
by
Gunho Park
Browse files
Merge remote-tracking branch 'upstream/master'
parents
6ddd627a
bcbce005
Changes
28
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1269 additions
and
275 deletions
+1269
-275
official/core/base_trainer.py
official/core/base_trainer.py
+3
-2
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+16
-18
official/nlp/data/sentence_prediction_dataloader.py
official/nlp/data/sentence_prediction_dataloader.py
+7
-5
official/nlp/data/sentence_prediction_dataloader_test.py
official/nlp/data/sentence_prediction_dataloader_test.py
+9
-6
official/nlp/projects/mobilebert/README.md
official/nlp/projects/mobilebert/README.md
+1
-1
official/nlp/tasks/sentence_prediction.py
official/nlp/tasks/sentence_prediction.py
+10
-6
official/projects/README.md
official/projects/README.md
+2
-0
official/vision/beta/data/create_coco_tf_record.py
official/vision/beta/data/create_coco_tf_record.py
+27
-10
official/vision/beta/data/process_coco_few_shot.sh
official/vision/beta/data/process_coco_few_shot.sh
+48
-0
official/vision/beta/data/process_coco_few_shot_json_files.py
...cial/vision/beta/data/process_coco_few_shot_json_files.py
+124
-0
official/vision/beta/projects/movinet/modeling/movinet.py
official/vision/beta/projects/movinet/modeling/movinet.py
+11
-2
official/vision/beta/projects/movinet/modeling/movinet_layers.py
...l/vision/beta/projects/movinet/modeling/movinet_layers.py
+21
-2
official/vision/beta/projects/movinet/modeling/movinet_model.py
...al/vision/beta/projects/movinet/modeling/movinet_model.py
+22
-6
official/vision/beta/projects/yolo/README.md
official/vision/beta/projects/yolo/README.md
+5
-0
official/vision/beta/projects/yolo/configs/backbones.py
official/vision/beta/projects/yolo/configs/backbones.py
+7
-4
official/vision/beta/projects/yolo/configs/darknet_classification.py
...sion/beta/projects/yolo/configs/darknet_classification.py
+1
-1
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+422
-184
official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py
...ion/beta/projects/yolo/modeling/backbones/darknet_test.py
+41
-28
official/vision/beta/projects/yolo/modeling/decoders/__init__.py
...l/vision/beta/projects/yolo/modeling/decoders/__init__.py
+14
-0
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+478
-0
No files found.
official/core/base_trainer.py
View file @
2b676a9b
...
...
@@ -246,10 +246,11 @@ class Trainer(_AsyncTrainer):
self
.
_train_loss
=
tf
.
keras
.
metrics
.
Mean
(
"training_loss"
,
dtype
=
tf
.
float32
)
self
.
_validation_loss
=
tf
.
keras
.
metrics
.
Mean
(
"validation_loss"
,
dtype
=
tf
.
float32
)
model_metrics
=
model
.
metrics
if
hasattr
(
model
,
"metrics"
)
else
[]
self
.
_train_metrics
=
self
.
task
.
build_metrics
(
training
=
True
)
+
self
.
model
.
metrics
training
=
True
)
+
model
_
metrics
self
.
_validation_metrics
=
self
.
task
.
build_metrics
(
training
=
False
)
+
self
.
model
.
metrics
training
=
False
)
+
model
_
metrics
self
.
init_async
()
...
...
official/nlp/data/classifier_data_lib.py
View file @
2b676a9b
...
...
@@ -181,20 +181,21 @@ class AxProcessor(DataProcessor):
class
ColaProcessor
(
DataProcessor
):
"""Processor for the CoLA data set (GLUE version)."""
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
super
(
ColaProcessor
,
self
).
__init__
(
process_text_fn
)
self
.
dataset
=
tfds
.
load
(
"glue/cola"
,
try_gcs
=
True
)
def
get_train_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"train.tsv"
)),
"train"
)
return
self
.
_create_examples_tfds
(
"train"
)
def
get_dev_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"dev.tsv"
)),
"dev"
)
return
self
.
_create_examples_tfds
(
"validation"
)
def
get_test_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"test.tsv"
)),
"test"
)
return
self
.
_create_examples_tfds
(
"test"
)
def
get_labels
(
self
):
"""See base class."""
...
...
@@ -205,22 +206,19 @@ class ColaProcessor(DataProcessor):
"""See base class."""
return
"COLA"
def
_create_examples
(
self
,
lines
,
set_type
):
def
_create_examples
_tfds
(
self
,
set_type
):
"""Creates examples for the training/dev/test sets."""
dataset
=
self
.
dataset
[
set_type
].
as_numpy_iterator
()
examples
=
[]
for
i
,
line
in
enumerate
(
lines
):
# Only the test set has a header.
if
set_type
==
"test"
and
i
==
0
:
continue
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
if
set_type
==
"test"
:
text_a
=
self
.
process_text_fn
(
line
[
1
])
label
=
"0"
else
:
text_a
=
self
.
process_text_fn
(
line
[
3
])
label
=
self
.
process_text_fn
(
line
[
1
])
label
=
"0"
text_a
=
self
.
process_text_fn
(
example
[
"sentence"
])
if
set_type
!=
"test"
:
label
=
str
(
example
[
"label"
])
examples
.
append
(
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
None
,
label
=
label
))
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
text_b
=
None
,
label
=
label
,
weight
=
None
))
return
examples
...
...
official/nlp/data/sentence_prediction_dataloader.py
View file @
2b676a9b
...
...
@@ -40,6 +40,7 @@ class SentencePredictionDataConfig(cfg.DataConfig):
label_type
:
str
=
'int'
# Whether to include the example id number.
include_example_id
:
bool
=
False
label_field
:
str
=
'label_ids'
# Maps the key in TfExample to feature name.
# E.g 'label_ids' to 'next_sentence_labels'
label_name
:
Optional
[
Tuple
[
str
,
str
]]
=
None
...
...
@@ -53,6 +54,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
self
.
_params
=
params
self
.
_seq_length
=
params
.
seq_length
self
.
_include_example_id
=
params
.
include_example_id
self
.
_label_field
=
params
.
label_field
if
params
.
label_name
:
self
.
_label_name_mapping
=
dict
([
params
.
label_name
])
else
:
...
...
@@ -65,7 +67,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'
label_
ids'
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
self
.
_
label_
field
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
}
if
self
.
_include_example_id
:
name_to_features
[
'example_id'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
...
...
@@ -92,10 +94,10 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
if
self
.
_include_example_id
:
x
[
'example_id'
]
=
record
[
'example_id'
]
x
[
'
label_
ids'
]
=
record
[
'
label_
ids'
]
x
[
self
.
_
label_
field
]
=
record
[
self
.
_
label_
field
]
if
'
label_
ids'
in
self
.
_label_name_mapping
:
x
[
self
.
_label_name_mapping
[
'
label_
ids'
]]
=
record
[
'
label_
ids'
]
if
self
.
_
label_
field
in
self
.
_label_name_mapping
:
x
[
self
.
_label_name_mapping
[
self
.
_
label_
field
]]
=
record
[
self
.
_
label_
field
]
return
x
...
...
@@ -215,7 +217,7 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader):
model_inputs
=
self
.
_text_processor
(
segments
)
if
self
.
_include_example_id
:
model_inputs
[
'example_id'
]
=
record
[
'example_id'
]
model_inputs
[
'
label_
ids'
]
=
record
[
self
.
_label_field
]
model_inputs
[
self
.
_
label_
field
]
=
record
[
self
.
_label_field
]
return
model_inputs
def
_decode
(
self
,
record
:
tf
.
Tensor
):
...
...
official/nlp/data/sentence_prediction_dataloader_test.py
View file @
2b676a9b
...
...
@@ -197,13 +197,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
vocab_file
=
vocab_file_path
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
@
parameterized
.
parameters
(
True
,
False
)
def
test_python_sentencepiece_preprocessing
(
self
,
use_tfds
):
...
...
@@ -231,13 +232,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
@
parameterized
.
parameters
(
True
,
False
)
def
test_saved_model_preprocessing
(
self
,
use_tfds
):
...
...
@@ -265,13 +267,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
)
dataset
=
loader
.
SentencePredictionTextDataLoader
(
data_config
).
load
()
features
=
next
(
iter
(
dataset
))
label_field
=
data_config
.
label_field
self
.
assertCountEqual
(
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
'
label_
ids'
],
[
'input_word_ids'
,
'input_type_ids'
,
'input_mask'
,
label_
field
],
features
.
keys
())
self
.
assertEqual
(
features
[
'input_word_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_mask'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'input_type_ids'
].
shape
,
(
batch_size
,
seq_length
))
self
.
assertEqual
(
features
[
'
label_
ids'
].
shape
,
(
batch_size
,))
self
.
assertEqual
(
features
[
label_
field
].
shape
,
(
batch_size
,))
if
__name__
==
'__main__'
:
...
...
official/nlp/projects/mobilebert/README.md
View file @
2b676a9b
...
...
@@ -22,7 +22,7 @@ modeling library:
*
[
mobile_bert_encoder.py
](
https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/mobile_bert_encoder.py
)
contains
`MobileBERTEncoder`
implementation.
*
[
mobile_bert_layers.py
](
https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/mobile_bert_layers.py
)
contains
`MobileBertEmbedding`
,
`MobileBert
MaskedLM
`
and
`MobileBertMaskedLM`
contains
`MobileBertEmbedding`
,
`MobileBert
Transformer
`
and
`MobileBertMaskedLM`
implementation.
## Pre-trained Models
...
...
official/nlp/tasks/sentence_prediction.py
View file @
2b676a9b
...
...
@@ -69,6 +69,10 @@ class SentencePredictionTask(base_task.Task):
if
params
.
metric_type
not
in
METRIC_TYPES
:
raise
ValueError
(
'Invalid metric_type: {}'
.
format
(
params
.
metric_type
))
self
.
metric_type
=
params
.
metric_type
if
hasattr
(
params
.
train_data
,
'label_field'
):
self
.
label_field
=
params
.
train_data
.
label_field
else
:
self
.
label_field
=
'label_ids'
def
build_model
(
self
):
if
self
.
task_config
.
hub_module_url
and
self
.
task_config
.
init_checkpoint
:
...
...
@@ -95,7 +99,7 @@ class SentencePredictionTask(base_task.Task):
use_encoder_pooler
=
self
.
task_config
.
model
.
use_encoder_pooler
)
def
build_losses
(
self
,
labels
,
model_outputs
,
aux_losses
=
None
)
->
tf
.
Tensor
:
label_ids
=
labels
[
'
label_
ids'
]
label_ids
=
labels
[
self
.
label_
field
]
if
self
.
task_config
.
model
.
num_classes
==
1
:
loss
=
tf
.
keras
.
losses
.
mean_squared_error
(
label_ids
,
model_outputs
)
else
:
...
...
@@ -121,7 +125,7 @@ class SentencePredictionTask(base_task.Task):
y
=
tf
.
zeros
((
1
,),
dtype
=
tf
.
float32
)
else
:
y
=
tf
.
zeros
((
1
,
1
),
dtype
=
tf
.
int32
)
x
[
'
label_
ids'
]
=
y
x
[
self
.
label_
field
]
=
y
return
x
dataset
=
tf
.
data
.
Dataset
.
range
(
1
)
...
...
@@ -144,10 +148,10 @@ class SentencePredictionTask(base_task.Task):
def
process_metrics
(
self
,
metrics
,
labels
,
model_outputs
):
for
metric
in
metrics
:
metric
.
update_state
(
labels
[
'
label_
ids'
],
model_outputs
)
metric
.
update_state
(
labels
[
self
.
label_
field
],
model_outputs
)
def
process_compiled_metrics
(
self
,
compiled_metrics
,
labels
,
model_outputs
):
compiled_metrics
.
update_state
(
labels
,
model_outputs
)
compiled_metrics
.
update_state
(
labels
[
self
.
label_field
]
,
model_outputs
)
def
validation_step
(
self
,
inputs
,
model
:
tf
.
keras
.
Model
,
metrics
=
None
):
if
self
.
metric_type
==
'accuracy'
:
...
...
@@ -163,12 +167,12 @@ class SentencePredictionTask(base_task.Task):
'sentence_prediction'
:
# Ensure one prediction along batch dimension.
tf
.
expand_dims
(
tf
.
math
.
argmax
(
outputs
,
axis
=
1
),
axis
=
1
),
'labels'
:
labels
[
'
label_
ids'
],
labels
[
self
.
label_
field
],
})
if
self
.
metric_type
==
'pearson_spearman_corr'
:
logs
.
update
({
'sentence_prediction'
:
outputs
,
'labels'
:
labels
[
'
label_
ids'
],
'labels'
:
labels
[
self
.
label_
field
],
})
return
logs
...
...
official/projects/README.md
0 → 100644
View file @
2b676a9b
This directory contains projects using TensorFlow Model Garden Modeling
libraries.
official/vision/beta/data/create_coco_tf_record.py
View file @
2b676a9b
...
...
@@ -46,7 +46,7 @@ from official.vision.beta.data import tfrecord_lib
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
flags
.
DEFINE_string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_
multi_
string
(
'image_dir'
,
''
,
'Directory containing images.'
)
flags
.
DEFINE_string
(
'image_info_file'
,
''
,
'File containing image information. '
'Tf Examples in the output files correspond to the image '
...
...
@@ -159,7 +159,7 @@ def encode_caption_annotations(caption_annotations):
def
create_tf_example
(
image
,
image_dir
,
image_dir
s
,
bbox_annotations
=
None
,
id_to_name_map
=
None
,
caption_annotations
=
None
,
...
...
@@ -169,7 +169,7 @@ def create_tf_example(image,
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
image_dir: director
y
containing the image files.
image_dir
s
:
list of
director
ies
containing the image files.
bbox_annotations:
list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
...
...
@@ -190,14 +190,31 @@ def create_tf_example(image,
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
ValueError: if the image pointed to by data['filename'] is not a valid JPEG,
does not exist, or is not unique across image directories.
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
if
len
(
image_dirs
)
>
1
:
full_paths
=
[
os
.
path
.
join
(
image_dir
,
filename
)
for
image_dir
in
image_dirs
]
full_existing_paths
=
[
p
for
p
in
full_paths
if
tf
.
io
.
gfile
.
exists
(
p
)]
if
not
full_existing_paths
:
raise
ValueError
(
'{} does not exist across image directories.'
.
format
(
filename
))
if
len
(
full_existing_paths
)
>
1
:
raise
ValueError
(
'{} is not unique across image directories'
.
format
(
filename
))
full_path
,
=
full_existing_paths
# If there is only one image directory, it's not worth checking for existence,
# since trying to open the file will raise an informative error message if it
# does not exist.
else
:
image_dir
,
=
image_dirs
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
io
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
...
...
@@ -276,7 +293,7 @@ def _load_images_info(images_info_file):
return
info_dict
[
'images'
]
def
generate_annotations
(
images
,
image_dir
,
def
generate_annotations
(
images
,
image_dir
s
,
img_to_obj_annotation
=
None
,
img_to_caption_annotation
=
None
,
id_to_name_map
=
None
,
include_masks
=
False
):
...
...
@@ -289,12 +306,12 @@ def generate_annotations(images, image_dir,
caption_annotaion
=
(
img_to_caption_annotation
.
get
(
image
[
'id'
],
None
)
if
img_to_caption_annotation
else
None
)
yield
(
image
,
image_dir
,
object_annotation
,
id_to_name_map
,
yield
(
image
,
image_dir
s
,
object_annotation
,
id_to_name_map
,
caption_annotaion
,
include_masks
)
def
_create_tf_record_from_coco_annotations
(
images_info_file
,
image_dir
,
image_dir
s
,
output_path
,
num_shards
,
object_annotations_file
=
None
,
...
...
@@ -309,7 +326,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
files Eg. 'image_info_test-dev2017.json',
'instance_annotations_train2017.json',
'caption_annotations_train2017.json', etc.
image_dir:
D
irector
y
containing the image files.
image_dir
s
:
List of d
irector
ies
containing the image files.
output_path: Path to output tf.Record file.
num_shards: Number of output files to create.
object_annotations_file: JSON file containing bounding box annotations.
...
...
@@ -333,7 +350,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
_load_caption_annotations
(
caption_annotations_file
))
coco_annotations_iter
=
generate_annotations
(
images
,
image_dir
,
img_to_obj_annotation
,
img_to_caption_annotation
,
images
,
image_dir
s
,
img_to_obj_annotation
,
img_to_caption_annotation
,
id_to_name_map
=
id_to_name_map
,
include_masks
=
include_masks
)
num_skipped
=
tfrecord_lib
.
write_tf_record_dataset
(
...
...
official/vision/beta/data/process_coco_few_shot.sh
0 → 100644
View file @
2b676a9b
#!/bin/bash
#
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir
=
$(
mktemp
-d
-t
coco-XXXXXXXXXX
)
output_dir
=
"/tmp/coco_few_shot"
while
getopts
"o:"
o
;
do
case
"
${
o
}
"
in
o
)
output_dir
=
${
OPTARG
}
;;
*
)
echo
"Usage:
${
0
}
[-o <output_dir>]"
1>&2
;
exit
1
;;
esac
done
cocosplit_url
=
"dl.yf.io/fs-det/datasets/cocosplit"
wget
--recursive
--no-parent
-q
--show-progress
--progress
=
bar:force:noscroll
\
-P
"
${
tmp_dir
}
"
-A
"5k.json,*10shot*.json,*30shot*.json"
\
"http://
${
cocosplit_url
}
/"
mv
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
*
"
${
tmp_dir
}
"
rm
-rf
"
${
tmp_dir
}
/
${
cocosplit_url
}
/"
python process_coco_few_shot_json_files.py
\
--logtostderr
--workdir
=
"
${
tmp_dir
}
"
for
seed
in
{
0..9
}
;
do
for
shots
in
10 30
;
do
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/
${
shots
}
shot_seed
${
seed
}
.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/
${
shots
}
shot_seed
${
seed
}
"
\
--num_shards
=
4
done
done
python create_coco_tf_record.py
\
--logtostderr
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/train2014
\
--image_dir
=
/namespace/vale-project/datasets/mscoco_raw/images/val2014
\
--image_info_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--object_annotations_file
=
"
${
tmp_dir
}
/datasplit/5k.json"
\
--caption_annotations_file
=
""
\
--output_file_prefix
=
"
${
output_dir
}
/5k"
\
--num_shards
=
10
rm
-rf
"
${
tmp_dir
}
"
official/vision/beta/data/process_coco_few_shot_json_files.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Processes the JSON files for COCO few-shot.
We assume that `workdir` mirrors the contents of
http://dl.yf.io/fs-det/datasets/cocosplit/, which contains the official JSON
files for the few-shot COCO evaluation procedure that Wang et al. (2020)'s
"Frustratingly Simple Few-Shot Object Detection" paper uses.
"""
import
collections
import
itertools
import
json
import
logging
import
os
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
logger
=
tf
.
get_logger
()
logger
.
setLevel
(
logging
.
INFO
)
flags
.
DEFINE_string
(
'workdir'
,
None
,
'Working directory.'
)
FLAGS
=
flags
.
FLAGS
CATEGORIES
=
[
'airplane'
,
'apple'
,
'backpack'
,
'banana'
,
'baseball bat'
,
'baseball glove'
,
'bear'
,
'bed'
,
'bench'
,
'bicycle'
,
'bird'
,
'boat'
,
'book'
,
'bottle'
,
'bowl'
,
'broccoli'
,
'bus'
,
'cake'
,
'car'
,
'carrot'
,
'cat'
,
'cell phone'
,
'chair'
,
'clock'
,
'couch'
,
'cow'
,
'cup'
,
'dining table'
,
'dog'
,
'donut'
,
'elephant'
,
'fire hydrant'
,
'fork'
,
'frisbee'
,
'giraffe'
,
'hair drier'
,
'handbag'
,
'horse'
,
'hot dog'
,
'keyboard'
,
'kite'
,
'knife'
,
'laptop'
,
'microwave'
,
'motorcycle'
,
'mouse'
,
'orange'
,
'oven'
,
'parking meter'
,
'person'
,
'pizza'
,
'potted plant'
,
'refrigerator'
,
'remote'
,
'sandwich'
,
'scissors'
,
'sheep'
,
'sink'
,
'skateboard'
,
'skis'
,
'snowboard'
,
'spoon'
,
'sports ball'
,
'stop sign'
,
'suitcase'
,
'surfboard'
,
'teddy bear'
,
'tennis racket'
,
'tie'
,
'toaster'
,
'toilet'
,
'toothbrush'
,
'traffic light'
,
'train'
,
'truck'
,
'tv'
,
'umbrella'
,
'vase'
,
'wine glass'
,
'zebra'
]
SEEDS
=
list
(
range
(
10
))
SHOTS
=
[
10
,
30
]
FILE_SUFFIXES
=
collections
.
defaultdict
(
list
)
for
_seed
,
_shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
for
_category
in
CATEGORIES
:
FILE_SUFFIXES
[(
_seed
,
_shots
)].
append
(
'{}full_box_{}shot_{}_trainval.json'
.
format
(
# http://dl.yf.io/fs-det/datasets/cocosplit/ is organized like so:
#
# datasplit/
# trainvalno5k.json
# 5k.json
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
# seed{1-9}/
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
#
# This means that the JSON files for seed0 are located in the root
# directory rather than in a `seed?/` subdirectory, hence the
# conditional expression below.
''
if
_seed
==
0
else
'seed{}/'
.
format
(
_seed
),
_shots
,
_category
))
def
main
(
unused_argv
):
workdir
=
FLAGS
.
workdir
for
seed
,
shots
in
itertools
.
product
(
SEEDS
,
SHOTS
):
# Retrieve all examples for a given seed and shots setting.
file_paths
=
[
os
.
path
.
join
(
workdir
,
suffix
)
for
suffix
in
FILE_SUFFIXES
[(
seed
,
shots
)]]
json_dicts
=
[]
for
file_path
in
file_paths
:
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'r'
)
as
f
:
json_dicts
.
append
(
json
.
load
(
f
))
# Make sure that all JSON files for a given seed and shots setting have the
# same metadata. We count on this to fuse them later on.
metadata_dicts
=
[{
'info'
:
d
[
'info'
],
'licenses'
:
d
[
'licenses'
],
'categories'
:
d
[
'categories'
]}
for
d
in
json_dicts
]
if
not
all
(
d
==
metadata_dicts
[
0
]
for
d
in
metadata_dicts
[
1
:]):
raise
RuntimeError
(
'JSON files for {} shots (seed {}) '
.
format
(
shots
,
seed
)
+
'have different info, licences, or categories fields'
)
# Retrieve images across all JSON files.
images
=
sum
((
d
[
'images'
]
for
d
in
json_dicts
),
[])
# Remove duplicate image entries.
images
=
list
({
image
[
'id'
]:
image
for
image
in
images
}.
values
())
output_dict
=
{
'info'
:
json_dicts
[
0
][
'info'
],
'licenses'
:
json_dicts
[
0
][
'licenses'
],
'categories'
:
json_dicts
[
0
][
'categories'
],
'images'
:
images
,
'annotations'
:
sum
((
d
[
'annotations'
]
for
d
in
json_dicts
),
[])
}
output_path
=
os
.
path
.
join
(
workdir
,
'{}shot_seed{}.json'
.
format
(
shots
,
seed
))
with
tf
.
io
.
gfile
.
GFile
(
output_path
,
'w'
)
as
f
:
json
.
dump
(
output_dict
,
f
)
logger
.
info
(
'Processed %d shots (seed %d) and saved to %s'
,
shots
,
seed
,
output_path
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'workdir'
)
app
.
run
(
main
)
official/vision/beta/projects/movinet/modeling/movinet.py
View file @
2b676a9b
...
...
@@ -525,7 +525,6 @@ class Movinet(tf.keras.Model):
Returns:
A dict mapping state names to state shapes.
"""
def
divide_resolution
(
shape
,
num_downsamples
):
"""Downsamples the dimension to calculate strided convolution shape."""
if
shape
is
None
:
...
...
@@ -564,6 +563,12 @@ class Movinet(tf.keras.Model):
for
layer_idx
,
layer
in
enumerate
(
params
):
expand_filters
,
kernel_size
,
strides
=
layer
# If we use a 2D kernel, we apply spatial downsampling
# before the buffer.
if
(
tuple
(
strides
[
1
:
3
])
!=
(
1
,
1
)
and
self
.
_conv_type
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
if
kernel_size
[
0
]
>
1
:
states
[
f
'state/b
{
block_idx
}
/l
{
layer_idx
}
/stream_buffer'
]
=
(
input_shape
[
0
],
...
...
@@ -585,7 +590,11 @@ class Movinet(tf.keras.Model):
if
strides
[
1
]
!=
strides
[
2
]:
raise
ValueError
(
'Strides must match in the spatial dimensions, '
'got {}'
.
format
(
strides
))
if
strides
[
1
]
!=
1
or
strides
[
2
]
!=
1
:
# If we use a 3D kernel, we apply spatial downsampling
# after the buffer.
if
(
tuple
(
strides
[
1
:
3
])
!=
(
1
,
1
)
and
self
.
_conv_type
not
in
[
'2plus1d'
,
'3d_2plus1d'
]):
num_downsamples
+=
1
elif
isinstance
(
block
,
HeadSpec
):
states
[
'state/head/pool_buffer'
]
=
(
...
...
official/vision/beta/projects/movinet/modeling/movinet_layers.py
View file @
2b676a9b
...
...
@@ -633,9 +633,28 @@ class StreamConvBlock(ConvBlock):
states
=
dict
(
states
)
if
states
is
not
None
else
{}
x
=
inputs
if
self
.
_stream_buffer
is
not
None
:
# If we have no separate temporal conv, use the buffer before the 3D conv.
if
self
.
_conv_temporal
is
None
and
self
.
_stream_buffer
is
not
None
:
x
,
states
=
self
.
_stream_buffer
(
x
,
states
=
states
)
x
=
super
(
StreamConvBlock
,
self
).
call
(
x
)
x
=
self
.
_conv
(
x
)
if
self
.
_batch_norm
is
not
None
:
x
=
self
.
_batch_norm
(
x
)
if
self
.
_activation_layer
is
not
None
:
x
=
self
.
_activation_layer
(
x
)
if
self
.
_conv_temporal
is
not
None
:
if
self
.
_stream_buffer
is
not
None
:
# If we have a separate temporal conv, use the buffer before the
# 1D conv instead (otherwise, we may waste computation on the 2D conv).
x
,
states
=
self
.
_stream_buffer
(
x
,
states
=
states
)
x
=
self
.
_conv_temporal
(
x
)
if
self
.
_batch_norm_temporal
is
not
None
:
x
=
self
.
_batch_norm_temporal
(
x
)
if
self
.
_activation_layer
is
not
None
:
x
=
self
.
_activation_layer
(
x
)
return
x
,
states
...
...
official/vision/beta/projects/movinet/modeling/movinet_model.py
View file @
2b676a9b
...
...
@@ -115,15 +115,31 @@ class MovinetClassifier(tf.keras.Model):
inputs
=
{
**
states
,
'image'
:
image
}
if
backbone
.
use_external_states
:
before_states
=
set
(
states
)
before_states
=
states
endpoints
,
states
=
backbone
(
inputs
)
after_states
=
set
(
states
)
after_states
=
states
new_states
=
after_states
-
before_states
new_states
=
set
(
after_states
)
-
set
(
before_states
)
if
new_states
:
raise
AttributeError
(
'Expected input and output states to be the same. '
'Got extra states {}, expected {}'
.
format
(
new_states
,
before_states
))
raise
ValueError
(
'Expected input and output states to be the same. Got extra states '
'{}, expected {}'
.
format
(
new_states
,
set
(
before_states
)))
mismatched_shapes
=
{}
for
name
in
after_states
:
before_shape
=
before_states
[
name
].
shape
after_shape
=
after_states
[
name
].
shape
if
len
(
before_shape
)
!=
len
(
after_shape
):
mismatched_shapes
[
name
]
=
(
before_shape
,
after_shape
)
continue
for
before
,
after
in
zip
(
before_shape
,
after_shape
):
if
before
is
not
None
and
after
is
not
None
and
before
!=
after
:
mismatched_shapes
[
name
]
=
(
before_shape
,
after_shape
)
break
if
mismatched_shapes
:
raise
ValueError
(
'Got mismatched input and output state shapes: {}'
.
format
(
mismatched_shapes
))
else
:
endpoints
,
states
=
backbone
(
inputs
)
...
...
official/vision/beta/projects/yolo/README.md
View file @
2b676a9b
DISCLAIMER: this YOLO implementation is still under development. No support will
be provided during the development phase.
# YOLO Object Detectors, You Only Look Once
[

](https://arxiv.org/abs/1804.02767)
...
...
@@ -74,3 +77,5 @@ head could be connected to a new, more powerful backbone if a person chose to.
[

](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
[

](https://www.python.org/downloads/release/python-380/)
official/vision/beta/projects/yolo/configs/backbones.py
View file @
2b676a9b
...
...
@@ -24,11 +24,14 @@ from official.vision.beta.configs import backbones
@
dataclasses
.
dataclass
class
DarkNet
(
hyperparams
.
Config
):
"""DarkNet config."""
model_id
:
str
=
"darknet53"
class
Darknet
(
hyperparams
.
Config
):
"""Darknet config."""
model_id
:
str
=
'darknet53'
width_scale
:
float
=
1.0
depth_scale
:
float
=
1.0
dilate
:
bool
=
False
@
dataclasses
.
dataclass
class
Backbone
(
backbones
.
Backbone
):
darknet
:
Dark
N
et
=
Dark
N
et
()
darknet
:
Dark
n
et
=
Dark
n
et
()
official/vision/beta/projects/yolo/configs/darknet_classification.py
View file @
2b676a9b
...
...
@@ -32,7 +32,7 @@ class ImageClassificationModel(hyperparams.Config):
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'darknet'
,
res
net
=
backbones
.
Dark
N
et
())
type
=
'darknet'
,
dark
net
=
backbones
.
Dark
n
et
())
dropout_rate
:
float
=
0.0
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
()
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
...
...
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
2b676a9b
This diff is collapsed.
Click to expand it.
official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py
View file @
2b676a9b
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# Lint as: python3
"""Tests for
resnet
."""
"""Tests for
yolo
."""
from
absl.testing
import
parameterized
import
numpy
as
np
...
...
@@ -24,35 +24,48 @@ from tensorflow.python.distribute import strategy_combinations
from
official.vision.beta.projects.yolo.modeling.backbones
import
darknet
class
Dark
N
etTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
Dark
n
etTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
224
,
"
darknet53
"
,
2
,
1
),
(
224
,
"
darknettiny
"
,
1
,
2
),
(
224
,
"
cspdarknettiny
"
,
1
,
1
),
(
224
,
"
cspdarknet53
"
,
2
,
1
),
(
224
,
'
darknet53
'
,
2
,
1
,
True
),
(
224
,
'
darknettiny
'
,
1
,
2
,
False
),
(
224
,
'
cspdarknettiny
'
,
1
,
1
,
False
),
(
224
,
'
cspdarknet53
'
,
2
,
1
,
True
),
)
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
scale_final
):
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
scale_final
,
dilate
):
"""Test creation of ResNet family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
network
=
darknet
.
Darknet
(
model_id
=
model_id
,
min_level
=
3
,
max_level
=
5
)
network
=
darknet
.
Darknet
(
model_id
=
model_id
,
min_level
=
3
,
max_level
=
5
,
dilate
=
dilate
)
self
.
assertEqual
(
network
.
model_id
,
model_id
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
"3"
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
256
*
endpoint_filter_scale
],
endpoints
[
"4"
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
"5"
].
shape
.
as_list
())
if
dilate
:
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
256
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
'5'
].
shape
.
as_list
())
else
:
self
.
assertAllEqual
([
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
256
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
([
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
512
*
endpoint_filter_scale
*
scale_final
],
endpoints
[
'5'
].
shape
.
as_list
())
@
combinations
.
generate
(
combinations
.
combine
(
...
...
@@ -66,20 +79,20 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
1
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
with
strategy
.
scope
():
network
=
darknet
.
Darknet
(
model_id
=
"
darknet53
"
,
min_size
=
3
,
max_size
=
5
)
network
=
darknet
.
Darknet
(
model_id
=
'
darknet53
'
,
min_size
=
3
,
max_size
=
5
)
_
=
network
(
inputs
)
@
parameterized
.
parameters
(
1
,
3
,
4
)
def
test_input_specs
(
self
,
input_dim
):
"""Test different input feature dimensions."""
tf
.
keras
.
backend
.
set_image_data_format
(
"
channels_last
"
)
tf
.
keras
.
backend
.
set_image_data_format
(
'
channels_last
'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
input_dim
])
network
=
darknet
.
Darknet
(
model_id
=
"
darknet53
"
,
min_level
=
3
,
max_level
=
5
,
input_specs
=
input_specs
)
model_id
=
'
darknet53
'
,
min_level
=
3
,
max_level
=
5
,
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
224
,
224
,
input_dim
),
batch_size
=
1
)
_
=
network
(
inputs
)
...
...
@@ -87,14 +100,14 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
model_id
=
"
darknet53
"
,
model_id
=
'
darknet53
'
,
min_level
=
3
,
max_level
=
5
,
use_sync_bn
=
False
,
activation
=
"
relu
"
,
activation
=
'
relu
'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
"
VarianceScaling
"
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
...
...
@@ -113,5 +126,5 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/modeling/decoders/__init__.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
0 → 100644
View file @
2b676a9b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.modeling.layers
import
nn_blocks
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
_IdentityRoute
(
tf
.
keras
.
layers
.
Layer
):
def
call
(
self
,
inputs
):
return
None
,
inputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloFPN
(
tf
.
keras
.
layers
.
Layer
):
"""YOLO Feature pyramid network."""
def
__init__
(
self
,
fpn_depth
=
4
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
activation
=
'leaky'
,
fpn_filter_scale
=
1
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""Yolo FPN initialization function (Yolo V4).
Args:
fpn_depth: `int`, number of layers to use in each FPN path
if you choose to use an FPN.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization momentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_fpn_depth
=
fpn_depth
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_spatial_attention
=
use_spatial_attention
self
.
_filter_scale
=
fpn_filter_scale
self
.
_csp_stack
=
csp_stack
self
.
_base_config
=
dict
(
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
bias_regularizer
=
self
.
_bias_regularizer
,
norm_epsilon
=
self
.
_norm_epsilon
,
norm_momentum
=
self
.
_norm_momentum
)
def
get_raw_depths
(
self
,
minimum_depth
,
inputs
):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth (int): depth of the smallest branch of the FPN.
inputs (dict): dictionary of the shape of input args as a dictionary of
lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths
=
[]
for
i
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
inputs
[
str
(
i
)][
-
1
]
/
self
.
_filter_scale
)
return
list
(
reversed
(
depths
))
def
build
(
self
,
inputs
):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
keys
=
[
int
(
key
)
for
key
in
inputs
.
keys
()]
self
.
_min_level
=
min
(
keys
)
self
.
_max_level
=
max
(
keys
)
self
.
_min_depth
=
inputs
[
str
(
self
.
_min_level
)][
-
1
]
self
.
_depths
=
self
.
get_raw_depths
(
self
.
_min_depth
,
inputs
)
# directly connect to an input path and process it
self
.
preprocessors
=
dict
()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self
.
resamples
=
dict
()
# set of convoltion layers and upsample layers that are used to
# prepare the FPN processors for output
for
level
,
depth
in
zip
(
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)),
self
.
_depths
):
if
level
==
self
.
_min_level
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
depth
//
2
,
inverted
=
True
,
upsample
=
True
,
drop_final
=
self
.
_csp_stack
==
0
,
upsample_size
=
2
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
_IdentityRoute
()
elif
level
!=
self
.
_max_level
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
depth
//
2
,
inverted
=
True
,
upsample
=
True
,
drop_final
=
False
,
upsample_size
=
2
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
depth
,
repetitions
=
self
.
_fpn_depth
-
int
(
level
==
self
.
_min_level
),
block_invert
=
True
,
insert_spp
=
False
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
else
:
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
depth
,
repetitions
=
self
.
_fpn_depth
+
1
*
int
(
self
.
_csp_stack
==
0
),
insert_spp
=
True
,
block_invert
=
False
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
def
call
(
self
,
inputs
):
outputs
=
dict
()
layer_in
=
inputs
[
str
(
self
.
_max_level
)]
for
level
in
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)):
_
,
x
=
self
.
preprocessors
[
str
(
level
)](
layer_in
)
outputs
[
str
(
level
)]
=
x
if
level
>
self
.
_min_level
:
x_next
=
inputs
[
str
(
level
-
1
)]
_
,
layer_in
=
self
.
resamples
[
str
(
level
-
1
)]([
x_next
,
x
])
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloPAN
(
tf
.
keras
.
layers
.
Layer
):
"""YOLO Path Aggregation Network."""
def
__init__
(
self
,
path_process_len
=
6
,
max_level_process_len
=
None
,
embed_spp
=
False
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
activation
=
'leaky'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
fpn_input
=
True
,
fpn_filter_scale
=
1.0
,
**
kwargs
):
"""Yolo Path Aggregation Network initialization function (Yolo V3 and V4).
Args:
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing
by zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
**kwargs: keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_path_process_len
=
path_process_len
self
.
_embed_spp
=
embed_spp
self
.
_use_spatial_attention
=
use_spatial_attention
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_fpn_input
=
fpn_input
self
.
_max_level_process_len
=
max_level_process_len
self
.
_csp_stack
=
csp_stack
self
.
_fpn_filter_scale
=
fpn_filter_scale
if
max_level_process_len
is
None
:
self
.
_max_level_process_len
=
path_process_len
self
.
_base_config
=
dict
(
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
bias_regularizer
=
self
.
_bias_regularizer
,
norm_epsilon
=
self
.
_norm_epsilon
,
norm_momentum
=
self
.
_norm_momentum
)
def
build
(
self
,
inputs
):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
# define the key order
keys
=
[
int
(
key
)
for
key
in
inputs
.
keys
()]
self
.
_min_level
=
min
(
keys
)
self
.
_max_level
=
max
(
keys
)
self
.
_min_depth
=
inputs
[
str
(
self
.
_min_level
)][
-
1
]
self
.
_depths
=
self
.
get_raw_depths
(
self
.
_min_depth
,
inputs
)
# directly connect to an input path and process it
self
.
preprocessors
=
dict
()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self
.
resamples
=
dict
()
# FPN will reverse the key process order for the backbone, so we need
# adjust the order that objects are created and processed to adjust for
# this. not using an FPN will directly connect the decoder to the backbone
# therefore the object creation order needs to be done from the largest
# to smallest level.
if
self
.
_fpn_input
:
# process order {... 3, 4, 5}
self
.
_iterator
=
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)
self
.
_check
=
lambda
x
:
x
<
self
.
_max_level
self
.
_key_shift
=
lambda
x
:
x
+
1
self
.
_input
=
self
.
_min_level
downsample
=
True
upsample
=
False
else
:
# process order {5, 4, 3, ...}
self
.
_iterator
=
list
(
reversed
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)))
self
.
_check
=
lambda
x
:
x
>
self
.
_min_level
self
.
_key_shift
=
lambda
x
:
x
-
1
self
.
_input
=
self
.
_max_level
downsample
=
False
upsample
=
True
if
self
.
_csp_stack
==
0
:
proc_filters
=
lambda
x
:
x
resample_filters
=
lambda
x
:
x
//
2
else
:
proc_filters
=
lambda
x
:
x
*
2
resample_filters
=
lambda
x
:
x
for
level
,
depth
in
zip
(
self
.
_iterator
,
self
.
_depths
):
if
level
==
self
.
_input
:
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
proc_filters
(
depth
),
repetitions
=
self
.
_max_level_process_len
,
insert_spp
=
self
.
_embed_spp
,
block_invert
=
False
,
insert_sam
=
self
.
_use_spatial_attention
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
else
:
self
.
resamples
[
str
(
level
)]
=
nn_blocks
.
PathAggregationBlock
(
filters
=
resample_filters
(
depth
),
upsample
=
upsample
,
downsample
=
downsample
,
inverted
=
False
,
drop_final
=
self
.
_csp_stack
==
0
,
**
self
.
_base_config
)
self
.
preprocessors
[
str
(
level
)]
=
nn_blocks
.
DarkRouteProcess
(
filters
=
proc_filters
(
depth
),
repetitions
=
self
.
_path_process_len
,
insert_spp
=
False
,
insert_sam
=
self
.
_use_spatial_attention
,
csp_stack
=
self
.
_csp_stack
,
**
self
.
_base_config
)
def
get_raw_depths
(
self
,
minimum_depth
,
inputs
):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth: `int` depth of the smallest branch of the FPN.
inputs: `dict[str, tf.InputSpec]` of the shape of input args as a
dictionary of lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths
=
[]
if
len
(
inputs
.
keys
())
>
3
or
self
.
_fpn_filter_scale
>
1
:
for
i
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
inputs
[
str
(
i
)][
-
1
]
*
2
)
else
:
for
_
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
depths
.
append
(
minimum_depth
)
minimum_depth
*=
2
if
self
.
_fpn_input
:
return
depths
return
list
(
reversed
(
depths
))
def
call
(
self
,
inputs
):
outputs
=
dict
()
layer_in
=
inputs
[
str
(
self
.
_input
)]
for
level
in
self
.
_iterator
:
x_route
,
x
=
self
.
preprocessors
[
str
(
level
)](
layer_in
)
outputs
[
str
(
level
)]
=
x
if
self
.
_check
(
level
):
x_next
=
inputs
[
str
(
self
.
_key_shift
(
level
))]
_
,
layer_in
=
self
.
resamples
[
str
(
self
.
_key_shift
(
level
))]([
x_route
,
x_next
])
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
YoloDecoder
(
tf
.
keras
.
Model
):
"""Darknet Backbone Decoder."""
def
__init__
(
self
,
input_specs
,
use_fpn
=
False
,
use_spatial_attention
=
False
,
csp_stack
=
False
,
fpn_depth
=
4
,
fpn_filter_scale
=
1
,
path_process_len
=
6
,
max_level_process_len
=
None
,
embed_spp
=
False
,
activation
=
'leaky'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""Yolo Decoder initialization function.
A unified model that ties all decoder components into a conditionally build
YOLO decoder.
Args:
input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs
to the heads.
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_use_fpn
=
use_fpn
self
.
_fpn_depth
=
fpn_depth
self
.
_path_process_len
=
path_process_len
self
.
_max_level_process_len
=
max_level_process_len
self
.
_embed_spp
=
embed_spp
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_base_config
=
dict
(
use_spatial_attention
=
use_spatial_attention
,
csp_stack
=
csp_stack
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
fpn_filter_scale
=
fpn_filter_scale
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_decoder_config
=
dict
(
path_process_len
=
self
.
_path_process_len
,
max_level_process_len
=
self
.
_max_level_process_len
,
embed_spp
=
self
.
_embed_spp
,
fpn_input
=
self
.
_use_fpn
,
**
self
.
_base_config
)
inputs
=
{
key
:
tf
.
keras
.
layers
.
Input
(
shape
=
value
[
1
:])
for
key
,
value
in
input_specs
.
items
()
}
if
self
.
_use_fpn
:
inter_outs
=
YoloFPN
(
fpn_depth
=
self
.
_fpn_depth
,
**
self
.
_base_config
)(
inputs
)
outputs
=
YoloPAN
(
**
self
.
_decoder_config
)(
inter_outs
)
else
:
inter_outs
=
None
outputs
=
YoloPAN
(
**
self
.
_decoder_config
)(
inputs
)
self
.
_output_specs
=
{
key
:
value
.
shape
for
key
,
value
in
outputs
.
items
()}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
outputs
,
name
=
'YoloDecoder'
)
@
property
def
use_fpn
(
self
):
return
self
.
_use_fpn
@
property
def
output_specs
(
self
):
return
self
.
_output_specs
def
get_config
(
self
):
config
=
dict
(
input_specs
=
self
.
_input_specs
,
use_fpn
=
self
.
_use_fpn
,
fpn_depth
=
self
.
_fpn_depth
,
**
self
.
_decoder_config
)
return
config
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment