Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
657dcda5
Commit
657dcda5
authored
Jul 01, 2020
by
Kaushik Shivakumar
Browse files
pull latest
parents
26e24e21
e6017471
Changes
114
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1058 additions
and
148 deletions
+1058
-148
official/nlp/tasks/sentence_prediction.py
official/nlp/tasks/sentence_prediction.py
+25
-21
official/nlp/tasks/tagging.py
official/nlp/tasks/tagging.py
+77
-25
official/nlp/tasks/tagging_test.py
official/nlp/tasks/tagging_test.py
+24
-3
official/nlp/transformer/optimizer.py
official/nlp/transformer/optimizer.py
+0
-71
official/nlp/transformer/transformer_main.py
official/nlp/transformer/transformer_main.py
+3
-8
official/nlp/transformer/translate.py
official/nlp/transformer/translate.py
+1
-1
official/pip_package/setup.py
official/pip_package/setup.py
+3
-0
official/requirements.txt
official/requirements.txt
+5
-2
official/vision/detection/README.md
official/vision/detection/README.md
+34
-0
official/vision/detection/configs/base_config.py
official/vision/detection/configs/base_config.py
+5
-0
official/vision/detection/modeling/architecture/factory.py
official/vision/detection/modeling/architecture/factory.py
+4
-0
official/vision/detection/modeling/architecture/nn_blocks.py
official/vision/detection/modeling/architecture/nn_blocks.py
+318
-0
official/vision/detection/modeling/architecture/spinenet.py
official/vision/detection/modeling/architecture/spinenet.py
+506
-0
official/vision/image_classification/classifier_trainer.py
official/vision/image_classification/classifier_trainer.py
+2
-1
official/vision/image_classification/optimizer_factory.py
official/vision/image_classification/optimizer_factory.py
+10
-3
official/vision/image_classification/optimizer_factory_test.py
...ial/vision/image_classification/optimizer_factory_test.py
+11
-3
research/delf/delf/protos/delf_config.proto
research/delf/delf/protos/delf_config.proto
+3
-0
research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
...f/delf/python/detect_to_retrieve/cluster_delf_features.py
+1
-1
research/delf/delf/python/examples/extractor.py
research/delf/delf/python/examples/extractor.py
+24
-4
research/delf/delf/python/feature_aggregation_extractor.py
research/delf/delf/python/feature_aggregation_extractor.py
+2
-5
No files found.
official/nlp/tasks/sentence_prediction.py
View file @
657dcda5
...
...
@@ -26,7 +26,6 @@ from official.core import base_task
from
official.modeling.hyperparams
import
config_definitions
as
cfg
from
official.nlp.configs
import
bert
from
official.nlp.data
import
sentence_prediction_dataloader
from
official.nlp.modeling
import
losses
as
loss_lib
from
official.nlp.tasks
import
utils
...
...
@@ -36,6 +35,7 @@ class SentencePredictionConfig(cfg.TaskConfig):
# At most one of `init_checkpoint` and `hub_module_url` can
# be specified.
init_checkpoint
:
str
=
''
init_cls_pooler
:
bool
=
False
hub_module_url
:
str
=
''
metric_type
:
str
=
'accuracy'
model
:
bert
.
BertPretrainerConfig
=
bert
.
BertPretrainerConfig
(
...
...
@@ -55,11 +55,11 @@ class SentencePredictionConfig(cfg.TaskConfig):
class
SentencePredictionTask
(
base_task
.
Task
):
"""Task object for sentence_prediction."""
def
__init__
(
self
,
params
=
cfg
.
TaskConfig
):
super
(
SentencePredictionTask
,
self
).
__init__
(
params
)
def
__init__
(
self
,
params
=
cfg
.
TaskConfig
,
logging_dir
=
None
):
super
(
SentencePredictionTask
,
self
).
__init__
(
params
,
logging_dir
)
if
params
.
hub_module_url
and
params
.
init_checkpoint
:
raise
ValueError
(
'At most one of `hub_module_url` and '
'`
pretra
in_checkpoint
_dir
` can be specified.'
)
'`in
it
_checkpoint` can be specified.'
)
if
params
.
hub_module_url
:
self
.
_hub_module
=
hub
.
load
(
params
.
hub_module_url
)
else
:
...
...
@@ -75,10 +75,10 @@ class SentencePredictionTask(base_task.Task):
return
bert
.
instantiate_bertpretrainer_from_cfg
(
self
.
task_config
.
model
)
def
build_losses
(
self
,
labels
,
model_outputs
,
aux_losses
=
None
)
->
tf
.
Tensor
:
loss
=
loss_lib
.
weighted_
sparse_categorical_crossentropy
_loss
(
labels
=
labels
,
predictions
=
tf
.
nn
.
log_softmax
(
tf
.
cast
(
model_outputs
[
'sentence_prediction'
],
tf
.
float32
),
axis
=-
1
)
)
loss
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
labels
,
tf
.
cast
(
model_outputs
[
'sentence_prediction'
],
tf
.
float32
),
from_logits
=
True
)
if
aux_losses
:
loss
+=
tf
.
add_n
(
aux_losses
)
...
...
@@ -94,7 +94,7 @@ class SentencePredictionTask(base_task.Task):
input_word_ids
=
dummy_ids
,
input_mask
=
dummy_ids
,
input_type_ids
=
dummy_ids
)
y
=
tf
.
one
s
((
1
,
1
),
dtype
=
tf
.
int32
)
y
=
tf
.
zero
s
((
1
,
1
),
dtype
=
tf
.
int32
)
return
(
x
,
y
)
dataset
=
tf
.
data
.
Dataset
.
range
(
1
)
...
...
@@ -126,25 +126,26 @@ class SentencePredictionTask(base_task.Task):
outputs
=
self
.
inference_step
(
features
,
model
)
loss
=
self
.
build_losses
(
labels
=
labels
,
model_outputs
=
outputs
,
aux_losses
=
model
.
losses
)
logs
=
{
self
.
loss
:
loss
}
if
self
.
metric_type
==
'matthews_corrcoef'
:
return
{
self
.
loss
:
loss
,
logs
.
update
({
'sentence_prediction'
:
tf
.
expand_dims
(
tf
.
math
.
argmax
(
outputs
[
'sentence_prediction'
],
axis
=
1
),
axis
=
0
),
'labels'
:
labels
,
}
}
)
if
self
.
metric_type
==
'pearson_spearman_corr'
:
return
{
self
.
loss
:
loss
,
logs
.
update
({
'sentence_prediction'
:
outputs
[
'sentence_prediction'
],
'labels'
:
labels
,
}
})
return
logs
def
aggregate_logs
(
self
,
state
=
None
,
step_outputs
=
None
):
if
self
.
metric_type
==
'accuracy'
:
return
None
if
state
is
None
:
state
=
{
'sentence_prediction'
:
[],
'labels'
:
[]}
state
[
'sentence_prediction'
].
append
(
...
...
@@ -178,13 +179,16 @@ class SentencePredictionTask(base_task.Task):
return
pretrain2finetune_mapping
=
{
'encoder'
:
model
.
checkpoint_items
[
'encoder'
],
'next_sentence.pooler_dense'
:
model
.
checkpoint_items
[
'sentence_prediction.pooler_dense'
],
'encoder'
:
model
.
checkpoint_items
[
'encoder'
],
}
# TODO(b/160251903): Investigate why no pooler dense improves finetuning
# accuracies.
if
self
.
task_config
.
init_cls_pooler
:
pretrain2finetune_mapping
[
'next_sentence.pooler_dense'
]
=
model
.
checkpoint_items
[
'sentence_prediction.pooler_dense'
]
ckpt
=
tf
.
train
.
Checkpoint
(
**
pretrain2finetune_mapping
)
status
=
ckpt
.
re
store
(
ckpt_dir_or_file
)
status
=
ckpt
.
re
ad
(
ckpt_dir_or_file
)
status
.
expect_partial
().
assert_existing_objects_matched
()
logging
.
info
(
'finished loading pretrained checkpoint from %s'
,
ckpt_dir_or_file
)
official/nlp/tasks/tagging.py
View file @
657dcda5
...
...
@@ -15,7 +15,12 @@
# ==============================================================================
"""Tagging (e.g., NER/POS) task."""
import
logging
from
typing
import
List
,
Optional
import
dataclasses
from
seqeval
import
metrics
as
seqeval_metrics
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
...
...
@@ -36,12 +41,12 @@ class TaggingConfig(cfg.TaskConfig):
model
:
encoders
.
TransformerEncoderConfig
=
(
encoders
.
TransformerEncoderConfig
())
# The
number of
real la
bels. Note that a word may be tokenized into
#
multiple word_pieces tokens, and we asssume the real label id (non-negative)
#
is
ass
igned to the first token of the word, and a negative label id is
#
assigned to the remaining tokens. The negative label id will not contribute
# to loss and metrics.
num_
class
es
:
int
=
0
# The real
c
la
ss names, the order of which should match real label id.
#
Note that a word may be tokenized into multiple word_pieces tokens, and
#
we
ass
sume the real label id (non-negative) is assigned to the first token
#
of the word, and a negative label id is assigned to the remaining tokens.
#
The negative label id will not contribute
to loss and metrics.
class
_names
:
Optional
[
List
[
str
]]
=
None
train_data
:
cfg
.
DataConfig
=
cfg
.
DataConfig
()
validation_data
:
cfg
.
DataConfig
=
cfg
.
DataConfig
()
...
...
@@ -70,13 +75,13 @@ def _masked_labels_and_weights(y_true):
class
TaggingTask
(
base_task
.
Task
):
"""Task object for tagging (e.g., NER or POS)."""
def
__init__
(
self
,
params
=
cfg
.
TaskConfig
):
super
(
TaggingTask
,
self
).
__init__
(
params
)
def
__init__
(
self
,
params
=
cfg
.
TaskConfig
,
logging_dir
=
None
):
super
(
TaggingTask
,
self
).
__init__
(
params
,
logging_dir
)
if
params
.
hub_module_url
and
params
.
init_checkpoint
:
raise
ValueError
(
'At most one of `hub_module_url` and '
'`init_checkpoint` can be specified.'
)
if
params
.
num_
class
es
==
0
:
raise
ValueError
(
'TaggingConfig.
num_
classes cannot be
0
.'
)
if
not
params
.
class
_names
:
raise
ValueError
(
'TaggingConfig.class
_nam
es cannot be
empty
.'
)
if
params
.
hub_module_url
:
self
.
_hub_module
=
hub
.
load
(
params
.
hub_module_url
)
...
...
@@ -92,7 +97,7 @@ class TaggingTask(base_task.Task):
return
models
.
BertTokenClassifier
(
network
=
encoder_network
,
num_classes
=
self
.
task_config
.
num_
classes
,
num_classes
=
len
(
self
.
task_config
.
class
_nam
es
)
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
self
.
task_config
.
model
.
initializer_range
),
dropout_rate
=
self
.
task_config
.
model
.
dropout_rate
,
...
...
@@ -123,7 +128,7 @@ class TaggingTask(base_task.Task):
y
=
tf
.
random
.
uniform
(
shape
=
(
1
,
params
.
seq_length
),
minval
=-
1
,
maxval
=
self
.
task_config
.
num_
classes
,
maxval
=
len
(
self
.
task_config
.
class
_nam
es
)
,
dtype
=
tf
.
dtypes
.
int32
)
return
(
x
,
y
)
...
...
@@ -136,19 +141,66 @@ class TaggingTask(base_task.Task):
dataset
=
tagging_data_loader
.
TaggingDataLoader
(
params
).
load
(
input_context
)
return
dataset
def
build_metrics
(
self
,
training
=
None
):
del
training
# TODO(chendouble): evaluate using seqeval's f1/precision/recall.
return
[
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'accuracy'
)]
def
process_metrics
(
self
,
metrics
,
labels
,
model_outputs
):
masked_labels
,
masked_weights
=
_masked_labels_and_weights
(
labels
)
for
metric
in
metrics
:
metric
.
update_state
(
masked_labels
,
model_outputs
,
masked_weights
)
def
process_compiled_metrics
(
self
,
compiled_metrics
,
labels
,
model_outputs
):
masked_labels
,
masked_weights
=
_masked_labels_and_weights
(
labels
)
compiled_metrics
.
update_state
(
masked_labels
,
model_outputs
,
masked_weights
)
def
validation_step
(
self
,
inputs
,
model
:
tf
.
keras
.
Model
,
metrics
=
None
):
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features
,
labels
=
inputs
outputs
=
self
.
inference_step
(
features
,
model
)
loss
=
self
.
build_losses
(
labels
=
labels
,
model_outputs
=
outputs
)
# Negative label ids are padding labels which should be ignored.
real_label_index
=
tf
.
where
(
tf
.
greater_equal
(
labels
,
0
))
predict_ids
=
tf
.
math
.
argmax
(
outputs
,
axis
=-
1
)
predict_ids
=
tf
.
gather_nd
(
predict_ids
,
real_label_index
)
label_ids
=
tf
.
gather_nd
(
labels
,
real_label_index
)
return
{
self
.
loss
:
loss
,
'predict_ids'
:
predict_ids
,
'label_ids'
:
label_ids
,
}
def
aggregate_logs
(
self
,
state
=
None
,
step_outputs
=
None
):
"""Aggregates over logs returned from a validation step."""
if
state
is
None
:
state
=
{
'predict_class'
:
[],
'label_class'
:
[]}
def
id_to_class_name
(
batched_ids
):
class_names
=
[]
for
per_example_ids
in
batched_ids
:
class_names
.
append
([])
for
per_token_id
in
per_example_ids
.
numpy
().
tolist
():
class_names
[
-
1
].
append
(
self
.
task_config
.
class_names
[
per_token_id
])
return
class_names
# Convert id to class names, because `seqeval_metrics` relies on the class
# name to decide IOB tags.
state
[
'predict_class'
].
extend
(
id_to_class_name
(
step_outputs
[
'predict_ids'
]))
state
[
'label_class'
].
extend
(
id_to_class_name
(
step_outputs
[
'label_ids'
]))
return
state
def
reduce_aggregated_logs
(
self
,
aggregated_logs
):
"""Reduces aggregated logs over validation steps."""
label_class
=
aggregated_logs
[
'label_class'
]
predict_class
=
aggregated_logs
[
'predict_class'
]
return
{
'f1'
:
seqeval_metrics
.
f1_score
(
label_class
,
predict_class
),
'precision'
:
seqeval_metrics
.
precision_score
(
label_class
,
predict_class
),
'recall'
:
seqeval_metrics
.
recall_score
(
label_class
,
predict_class
),
'accuracy'
:
seqeval_metrics
.
accuracy_score
(
label_class
,
predict_class
),
}
def
initialize
(
self
,
model
):
"""Load a pretrained checkpoint (if exists) and then train from iter 0."""
...
...
official/nlp/tasks/tagging_test.py
View file @
657dcda5
...
...
@@ -58,7 +58,7 @@ class TaggingTest(tf.test.TestCase):
init_checkpoint
=
saved_path
,
model
=
self
.
_encoder_config
,
train_data
=
self
.
_train_data_config
,
num_
class
es
=
3
)
class
_names
=
[
"O"
,
"B-PER"
,
"I-PER"
]
)
task
=
tagging
.
TaggingTask
(
config
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
()
...
...
@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase):
config
=
tagging
.
TaggingConfig
(
model
=
self
.
_encoder_config
,
train_data
=
self
.
_train_data_config
,
num_
class
es
=
3
)
class
_names
=
[
"O"
,
"B-PER"
,
"I-PER"
]
)
task
=
tagging
.
TaggingTask
(
config
)
model
=
task
.
build_model
()
...
...
@@ -116,10 +116,31 @@ class TaggingTest(tf.test.TestCase):
config
=
tagging
.
TaggingConfig
(
hub_module_url
=
hub_module_url
,
model
=
self
.
_encoder_config
,
num_
class
es
=
4
,
class
_names
=
[
"O"
,
"B-PER"
,
"I-PER"
]
,
train_data
=
self
.
_train_data_config
)
self
.
_run_task
(
config
)
def
test_seqeval_metrics
(
self
):
config
=
tagging
.
TaggingConfig
(
model
=
self
.
_encoder_config
,
train_data
=
self
.
_train_data_config
,
class_names
=
[
"O"
,
"B-PER"
,
"I-PER"
])
task
=
tagging
.
TaggingTask
(
config
)
model
=
task
.
build_model
()
dataset
=
task
.
build_inputs
(
config
.
train_data
)
iterator
=
iter
(
dataset
)
strategy
=
tf
.
distribute
.
get_strategy
()
distributed_outputs
=
strategy
.
run
(
functools
.
partial
(
task
.
validation_step
,
model
=
model
),
args
=
(
next
(
iterator
),))
outputs
=
tf
.
nest
.
map_structure
(
strategy
.
experimental_local_results
,
distributed_outputs
)
aggregated
=
task
.
aggregate_logs
(
step_outputs
=
outputs
)
aggregated
=
task
.
aggregate_logs
(
state
=
aggregated
,
step_outputs
=
outputs
)
self
.
assertCountEqual
({
"f1"
,
"precision"
,
"recall"
,
"accuracy"
},
task
.
reduce_aggregated_logs
(
aggregated
).
keys
())
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/transformer/optimizer.py
View file @
657dcda5
...
...
@@ -18,9 +18,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
K
=
tf
.
keras
.
backend
class
LearningRateSchedule
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
...
...
@@ -66,72 +64,3 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
'hidden_size'
:
self
.
hidden_size
,
'warmup_steps'
:
self
.
warmup_steps
,
}
class
LearningRateFn
(
object
):
"""Creates learning rate function."""
def
__init__
(
self
,
learning_rate
,
hidden_size
,
warmup_steps
):
self
.
learning_rate
=
learning_rate
self
.
hidden_size
=
hidden_size
self
.
warmup_steps
=
float
(
warmup_steps
)
def
__call__
(
self
,
global_step
):
"""Calculate learning rate with linear warmup and rsqrt decay."""
step
=
float
(
global_step
)
learning_rate
=
self
.
learning_rate
learning_rate
*=
(
self
.
hidden_size
**
-
0.5
)
# Apply linear warmup
learning_rate
*=
np
.
minimum
(
1.0
,
step
/
self
.
warmup_steps
)
# Apply rsqrt decay
learning_rate
/=
np
.
sqrt
(
np
.
maximum
(
step
,
self
.
warmup_steps
))
return
learning_rate
class
LearningRateScheduler
(
tf
.
keras
.
callbacks
.
Callback
):
"""Keras callback to schedule learning rate.
TODO(tianlin): Refactor this scheduler and LearningRateBatchScheduler in
official/resnet/keras/keras_common.py.
"""
def
__init__
(
self
,
schedule
,
init_steps
=
None
,
verbose
=
False
):
super
(
LearningRateScheduler
,
self
).
__init__
()
self
.
schedule
=
schedule
self
.
verbose
=
verbose
if
init_steps
is
None
:
init_steps
=
0.0
self
.
steps
=
float
(
init_steps
)
# Total steps during training.
def
on_epoch_begin
(
self
,
epoch
,
logs
=
None
):
if
not
hasattr
(
self
.
model
.
optimizer
,
'lr'
):
raise
ValueError
(
'Optimizer must have a "lr" attribute.'
)
if
not
hasattr
(
self
.
model
.
optimizer
,
'iterations'
):
raise
ValueError
(
'Optimizer must have a "iterations" attribute.'
)
def
on_train_batch_begin
(
self
,
batch
,
logs
=
None
):
"""Adjusts learning rate for each train batch."""
if
self
.
verbose
>
0
:
iterations
=
K
.
get_value
(
self
.
model
.
optimizer
.
iterations
)
print
(
'Original iteration %d'
%
iterations
)
self
.
steps
+=
1.0
try
:
# new API
lr
=
float
(
K
.
get_value
(
self
.
model
.
optimizer
.
lr
))
lr
=
self
.
schedule
(
self
.
steps
,
lr
)
except
TypeError
:
# Support for old API for backward compatibility
lr
=
self
.
schedule
(
self
.
steps
)
if
not
isinstance
(
lr
,
(
float
,
np
.
float32
,
np
.
float64
)):
raise
ValueError
(
'The output of the "schedule" function '
'should be float.'
)
K
.
set_value
(
self
.
model
.
optimizer
.
lr
,
lr
)
K
.
set_value
(
self
.
model
.
optimizer
.
iterations
,
self
.
steps
)
if
self
.
verbose
>
0
:
print
(
'Batch %05d Step %05d: LearningRateScheduler setting learning '
'rate to %s.'
%
(
batch
+
1
,
self
.
steps
,
lr
))
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
logs
=
logs
or
{}
logs
[
'lr'
]
=
K
.
get_value
(
self
.
model
.
optimizer
.
lr
)
logs
[
'steps'
]
=
self
.
steps
official/nlp/transformer/transformer_main.py
View file @
657dcda5
...
...
@@ -241,7 +241,7 @@ class TransformerTask(object):
if
params
[
"use_ctl"
]:
train_ds_iterator
=
iter
(
train_ds
)
callbacks
=
self
.
_create_callbacks
(
flags_obj
.
model_dir
,
0
,
params
)
callbacks
=
self
.
_create_callbacks
(
flags_obj
.
model_dir
,
params
)
# Only TimeHistory callback is supported for CTL
if
params
[
"use_ctl"
]:
...
...
@@ -408,14 +408,9 @@ class TransformerTask(object):
for
i
in
range
(
length
):
translate
.
translate_from_input
(
val_outputs
[
i
],
subtokenizer
)
def
_create_callbacks
(
self
,
cur_log_dir
,
init_steps
,
params
):
def
_create_callbacks
(
self
,
cur_log_dir
,
params
):
"""Creates a list of callbacks."""
sfunc
=
optimizer
.
LearningRateFn
(
params
[
"learning_rate"
],
params
[
"hidden_size"
],
params
[
"learning_rate_warmup_steps"
])
scheduler_callback
=
optimizer
.
LearningRateScheduler
(
sfunc
,
init_steps
)
callbacks
=
misc
.
get_callbacks
()
callbacks
.
append
(
scheduler_callback
)
if
params
[
"enable_checkpointing"
]:
ckpt_full_path
=
os
.
path
.
join
(
cur_log_dir
,
"cp-{epoch:04d}.ckpt"
)
callbacks
.
append
(
...
...
@@ -445,7 +440,7 @@ class TransformerTask(object):
params
[
"learning_rate"
],
params
[
"hidden_size"
],
params
[
"learning_rate_warmup_steps"
])
opt
=
tf
.
keras
.
optimizers
.
Adam
(
lr_schedule
if
self
.
use_tpu
else
params
[
"learning_rate"
]
,
lr_schedule
,
params
[
"optimizer_adam_beta1"
],
params
[
"optimizer_adam_beta2"
],
epsilon
=
params
[
"optimizer_adam_epsilon"
])
...
...
official/nlp/transformer/translate.py
View file @
657dcda5
...
...
@@ -181,7 +181,7 @@ def translate_file(model,
raise
ValueError
(
"File output is a directory, will not save outputs to "
"file."
)
logging
.
info
(
"Writing to file %s"
,
output_file
)
with
tf
.
compat
.
v1
.
gfile
.
Open
(
output_file
,
"w"
)
as
f
:
with
tf
.
io
.
gfile
.
GFile
(
output_file
,
"w"
)
as
f
:
for
i
in
sorted_keys
:
f
.
write
(
"%s
\n
"
%
translations
[
i
])
...
...
official/pip_package/setup.py
View file @
657dcda5
...
...
@@ -45,6 +45,9 @@ def _get_requirements():
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'../requirements.txt'
),
'r'
)
as
f
:
for
line
in
f
:
package_name
=
line
.
strip
()
# Skip empty line or comments starting with "#".
if
not
package_name
or
package_name
[
0
]
==
'#'
:
continue
if
package_name
.
startswith
(
'-e '
):
dependency_links_tmp
.
append
(
package_name
[
3
:].
strip
())
else
:
...
...
official/requirements.txt
View file @
657dcda5
...
...
@@ -16,10 +16,13 @@ dataclasses
gin-config
tf_slim>=1.1.0
typing
sentencepiece
Cython
matplotlib
opencv-python-headless
pyyaml
# CV related dependencies
opencv-python-headless
Pillow
-e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI
# NLP related dependencies
seqeval
sentencepiece
official/vision/detection/README.md
View file @
657dcda5
...
...
@@ -48,6 +48,22 @@ so the checkpoints are not compatible.
We will unify the implementation soon.
### Train a SpineNet-49 based RetinaNet.
```
bash
TPU_NAME
=
"<your GCP TPU name>"
MODEL_DIR
=
"<path to the directory to store model files>"
TRAIN_FILE_PATTERN
=
"<path to the TFRecord training data>"
EVAL_FILE_PATTERN
=
"<path to the TFRecord validation data>"
VAL_JSON_FILE
=
"<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py
\
--strategy_type
=
tpu
\
--tpu
=
"
${
TPU_NAME
?
}
"
\
--model_dir
=
"
${
MODEL_DIR
?
}
"
\
--mode
=
train
\
--params_override
=
"{ type: retinanet, architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern:
${
TRAIN_FILE_PATTERN
?
}
}, eval: { val_json_file:
${
VAL_JSON_FILE
?
}
, eval_file_pattern:
${
EVAL_FILE_PATTERN
?
}
} }"
```
### Train a custom RetinaNet using the config file.
...
...
@@ -163,6 +179,24 @@ so the checkpoints are not compatible.
We will unify the implementation soon.
### Train a SpineNet-49 based Mask R-CNN.
```
bash
TPU_NAME
=
"<your GCP TPU name>"
MODEL_DIR
=
"<path to the directory to store model files>"
TRAIN_FILE_PATTERN
=
"<path to the TFRecord training data>"
EVAL_FILE_PATTERN
=
"<path to the TFRecord validation data>"
VAL_JSON_FILE
=
"<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py
\
--strategy_type
=
tpu
\
--tpu
=
"
${
TPU_NAME
?
}
"
\
--model_dir
=
"
${
MODEL_DIR
?
}
"
\
--mode
=
train
\
--model
=
mask_rcnn
\
--params_override
=
"{architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern:
${
TRAIN_FILE_PATTERN
?
}
}, eval: { val_json_file:
${
VAL_JSON_FILE
?
}
, eval_file_pattern:
${
EVAL_FILE_PATTERN
?
}
} }"
```
### Train a custom Mask R-CNN using the config file.
First, create a YAML config file, e.g.
*my_maskrcnn.yaml*
.
...
...
official/vision/detection/configs/base_config.py
View file @
657dcda5
...
...
@@ -17,10 +17,12 @@
BACKBONES
=
[
'resnet'
,
'spinenet'
,
]
MULTILEVEL_FEATURES
=
[
'fpn'
,
'identity'
,
]
# pylint: disable=line-too-long
...
...
@@ -118,6 +120,9 @@ BASE_CFG = {
'resnet'
:
{
'resnet_depth'
:
50
,
},
'spinenet'
:
{
'model_id'
:
'49'
,
},
'fpn'
:
{
'fpn_feat_dims'
:
256
,
'use_separable_conv'
:
False
,
...
...
official/vision/detection/modeling/architecture/factory.py
View file @
657dcda5
...
...
@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads
from
official.vision.detection.modeling.architecture
import
identity
from
official.vision.detection.modeling.architecture
import
nn_ops
from
official.vision.detection.modeling.architecture
import
resnet
from
official.vision.detection.modeling.architecture
import
spinenet
def
norm_activation_generator
(
params
):
...
...
@@ -42,6 +43,9 @@ def backbone_generator(params):
activation
=
params
.
norm_activation
.
activation
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
elif
params
.
architecture
.
backbone
==
'spinenet'
:
spinenet_params
=
params
.
spinenet
backbone_fn
=
spinenet
.
SpineNetBuilder
(
model_id
=
spinenet_params
.
model_id
)
else
:
raise
ValueError
(
'Backbone model `{}` is not supported.'
.
format
(
params
.
architecture
.
backbone
))
...
...
official/vision/detection/modeling/architecture/nn_blocks.py
0 → 100644
View file @
657dcda5
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResidualBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A residual block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
ResidualBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
super
(
ResidualBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
ResidualBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
return
self
.
_activation_fn
(
x
+
shortcut
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A standard bottleneck block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
BottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
super
(
BottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
BottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv3
(
x
)
x
=
self
.
_norm3
(
x
)
return
self
.
_activation_fn
(
x
+
shortcut
)
official/vision/detection/modeling/architecture/spinenet.py
0 → 100644
View file @
657dcda5
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import
math
from
absl
import
logging
import
tensorflow
as
tf
from
tensorflow.python.keras
import
backend
from
official.modeling
import
tf_utils
from
official.vision.detection.modeling.architecture
import
nn_blocks
layers
=
tf
.
keras
.
layers
FILTER_SIZE_MAP
=
{
1
:
32
,
2
:
64
,
3
:
128
,
4
:
256
,
5
:
256
,
6
:
256
,
7
:
256
,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS
=
[
(
2
,
'bottleneck'
,
(
0
,
1
),
False
),
(
4
,
'residual'
,
(
0
,
1
),
False
),
(
3
,
'bottleneck'
,
(
2
,
3
),
False
),
(
4
,
'bottleneck'
,
(
2
,
4
),
False
),
(
6
,
'residual'
,
(
3
,
5
),
False
),
(
4
,
'bottleneck'
,
(
3
,
5
),
False
),
(
5
,
'residual'
,
(
6
,
7
),
False
),
(
7
,
'residual'
,
(
6
,
8
),
False
),
(
5
,
'bottleneck'
,
(
8
,
9
),
False
),
(
5
,
'bottleneck'
,
(
8
,
10
),
False
),
(
4
,
'bottleneck'
,
(
5
,
10
),
True
),
(
3
,
'bottleneck'
,
(
4
,
10
),
True
),
(
5
,
'bottleneck'
,
(
7
,
12
),
True
),
(
7
,
'bottleneck'
,
(
5
,
14
),
True
),
(
6
,
'bottleneck'
,
(
12
,
14
),
True
),
]
SCALING_MAP
=
{
'49S'
:
{
'endpoints_num_filters'
:
128
,
'filter_size_scale'
:
0.65
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'49'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'96'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
2
,
},
'143'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
3
,
},
'190'
:
{
'endpoints_num_filters'
:
512
,
'filter_size_scale'
:
1.3
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
4
,
},
}
class
BlockSpec
(
object
):
"""A container class that specifies the block configuration for SpineNet."""
def
__init__
(
self
,
level
,
block_fn
,
input_offsets
,
is_output
):
self
.
level
=
level
self
.
block_fn
=
block_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
=
None
):
"""Builds the list of BlockSpec objects for SpineNet."""
if
not
block_specs
:
block_specs
=
SPINENET_BLOCK_SPECS
logging
.
info
(
'Building SpineNet block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpineNet
(
tf
.
keras
.
Model
):
"""Class to build SpineNet models."""
def
__init__
(
self
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
640
,
640
,
3
]),
min_level
=
3
,
max_level
=
7
,
block_specs
=
build_block_specs
(),
endpoints_num_filters
=
256
,
resample_alpha
=
0.5
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""SpineNet model."""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
endpoints_num_filters
self
.
_resample_alpha
=
resample_alpha
self
.
_block_repeats
=
block_repeats
self
.
_filter_size_scale
=
filter_size_scale
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
activation
==
'relu'
:
self
.
_activation
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Activation {} not implemented.'
.
format
(
activation
))
self
.
_init_block_fn
=
'bottleneck'
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
# Build SpineNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
=
self
.
_build_stem
(
inputs
=
inputs
)
net
=
self
.
_build_scale_permuted_network
(
net
=
net
,
input_width
=
input_specs
.
shape
[
1
])
net
=
self
.
_build_endpoints
(
net
=
net
)
super
(
SpineNet
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
net
)
def
_block_group
(
self
,
inputs
,
filters
,
strides
,
block_fn_cand
,
block_repeats
=
1
,
name
=
'block_group'
):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates
=
{
'bottleneck'
:
nn_blocks
.
BottleneckBlock
,
'residual'
:
nn_blocks
.
ResidualBlock
,
}
block_fn
=
block_fn_candidates
[
block_fn_cand
]
_
,
_
,
_
,
num_filters
=
inputs
.
get_shape
().
as_list
()
if
block_fn_cand
==
'bottleneck'
:
use_projection
=
not
(
num_filters
==
(
filters
*
4
)
and
strides
==
1
)
else
:
use_projection
=
not
(
num_filters
==
filters
and
strides
==
1
)
x
=
block_fn
(
filters
=
filters
,
strides
=
strides
,
use_projection
=
use_projection
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
_
in
range
(
1
,
block_repeats
):
x
=
block_fn
(
filters
=
filters
,
strides
=
1
,
use_projection
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x
)
return
tf
.
identity
(
x
,
name
=
name
)
def
_build_stem
(
self
,
inputs
):
"""Build SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
7
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'same'
)(
x
)
net
=
[]
# Build the initial level 2 blocks.
for
i
in
range
(
self
.
_num_init_blocks
):
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
int
(
FILTER_SIZE_MAP
[
2
]
*
self
.
_filter_size_scale
),
strides
=
1
,
block_fn_cand
=
self
.
_init_block_fn
,
block_repeats
=
self
.
_block_repeats
,
name
=
'stem_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
return
net
def
_build_scale_permuted_network
(
self
,
net
,
input_width
,
weighted_fusion
=
False
):
"""Build scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
**
2
))]
*
len
(
net
)
net_block_fns
=
[
self
.
_init_block_fn
]
*
len
(
net
)
num_outgoing_connections
=
[
0
]
*
len
(
net
)
endpoints
=
{}
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
# Find out specs for the target block.
target_width
=
int
(
math
.
ceil
(
input_width
/
2
**
block_spec
.
level
))
target_num_filters
=
int
(
FILTER_SIZE_MAP
[
block_spec
.
level
]
*
self
.
_filter_size_scale
)
target_block_fn
=
block_spec
.
block_fn
# Resample then merge input0 and input1.
parents
=
[]
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
x0
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input0
],
input_width
=
net_sizes
[
input0
],
input_block_fn
=
net_block_fns
[
input0
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x0
)
num_outgoing_connections
[
input0
]
+=
1
x1
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input1
],
input_width
=
net_sizes
[
input1
],
input_block_fn
=
net_block_fns
[
input1
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x1
)
num_outgoing_connections
[
input1
]
+=
1
# Merge 0 outdegree blocks to the output block.
if
block_spec
.
is_output
:
for
j
,
(
j_feat
,
j_connections
)
in
enumerate
(
zip
(
net
,
num_outgoing_connections
)):
if
j_connections
==
0
and
(
j_feat
.
shape
[
2
]
==
target_width
and
j_feat
.
shape
[
3
]
==
x0
.
shape
[
3
]):
parents
.
append
(
j_feat
)
num_outgoing_connections
[
j
]
+=
1
# pylint: disable=g-direct-tensorflow-import
if
weighted_fusion
:
dtype
=
parents
[
0
].
dtype
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
tf
.
add_n
(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
tf
.
add_n
(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
target_num_filters
,
strides
=
1
,
block_fn_cand
=
target_block_fn
,
block_repeats
=
self
.
_block_repeats
,
name
=
'scale_permuted_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
net_sizes
.
append
(
target_width
)
net_block_fns
.
append
(
target_block_fn
)
num_outgoing_connections
.
append
(
0
)
# Save output feats.
if
block_spec
.
is_output
:
if
block_spec
.
level
in
endpoints
:
raise
ValueError
(
'Duplicate feats found for output level {}.'
.
format
(
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
raise
ValueError
(
'Output level is out of range [{}, {}]'
.
format
(
self
.
_min_level
,
self
.
_max_level
))
endpoints
[
block_spec
.
level
]
=
x
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Match filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
filters
=
self
.
_endpoints_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
net
[
level
])
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
endpoints
[
level
]
=
x
return
endpoints
def
_resample_with_alpha
(
self
,
inputs
,
input_width
,
input_block_fn
,
target_width
,
target_num_filters
,
target_block_fn
,
alpha
=
0.5
):
"""Match resolution and feature dimension."""
_
,
_
,
_
,
input_num_filters
=
inputs
.
get_shape
().
as_list
()
if
input_block_fn
==
'bottleneck'
:
input_num_filters
/=
4
new_num_filters
=
int
(
input_num_filters
*
alpha
)
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
# Spatial resampling.
if
input_width
>
target_width
:
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
)(
x
)
input_width
/=
2
while
input_width
>
target_width
:
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'SAME'
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
layers
.
UpSampling2D
(
size
=
(
scale
,
scale
))(
x
)
# Last 1x1 conv to match filter size.
if
target_block_fn
==
'bottleneck'
:
target_num_filters
*=
4
x
=
layers
.
Conv2D
(
filters
=
target_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
return
x
class
SpineNetBuilder
(
object
):
"""SpineNet builder."""
def
__init__
(
self
,
model_id
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
640
,
640
,
3
]),
min_level
=
3
,
max_level
=
7
,
block_specs
=
build_block_specs
(),
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
):
if
model_id
not
in
SCALING_MAP
:
raise
ValueError
(
'SpineNet {} is not a valid architecture.'
.
format
(
model_id
))
scaling_params
=
SCALING_MAP
[
model_id
]
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
scaling_params
[
'endpoints_num_filters'
]
self
.
_resample_alpha
=
scaling_params
[
'resample_alpha'
]
self
.
_block_repeats
=
scaling_params
[
'block_repeats'
]
self
.
_filter_size_scale
=
scaling_params
[
'filter_size_scale'
]
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
def
__call__
(
self
,
inputs
,
is_training
=
None
):
with
backend
.
get_graph
().
as_default
():
model
=
SpineNet
(
input_specs
=
self
.
_input_specs
,
min_level
=
self
.
_min_level
,
max_level
=
self
.
_max_level
,
block_specs
=
self
.
_block_specs
,
endpoints_num_filters
=
self
.
_endpoints_num_filters
,
resample_alpha
=
self
.
_resample_alpha
,
block_repeats
=
self
.
_block_repeats
,
filter_size_scale
=
self
.
_filter_size_scale
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)
return
model
(
inputs
)
official/vision/image_classification/classifier_trainer.py
View file @
657dcda5
...
...
@@ -339,7 +339,8 @@ def train_and_eval(
optimizer
=
optimizer_factory
.
build_optimizer
(
optimizer_name
=
params
.
model
.
optimizer
.
name
,
base_learning_rate
=
learning_rate
,
params
=
params
.
model
.
optimizer
.
as_dict
())
params
=
params
.
model
.
optimizer
.
as_dict
(),
model
=
model
)
metrics_map
=
_get_metrics
(
one_hot
)
metrics
=
[
metrics_map
[
metric
]
for
metric
in
params
.
train
.
metrics
]
...
...
official/vision/image_classification/optimizer_factory.py
View file @
657dcda5
...
...
@@ -18,11 +18,12 @@ from __future__ import division
# from __future__ import google_type_annotations
from
__future__
import
print_function
from
typing
import
Any
,
Dict
,
Text
,
List
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
from
typing
import
Any
,
Dict
,
Text
,
List
from
official.vision.image_classification
import
learning_rate
from
official.vision.image_classification.configs
import
base_configs
...
...
@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
def
build_optimizer
(
optimizer_name
:
Text
,
base_learning_rate
:
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
,
params
:
Dict
[
Text
,
Any
]):
params
:
Dict
[
Text
,
Any
],
model
:
tf
.
keras
.
Model
=
None
):
"""Build the optimizer based on name.
Args:
...
...
@@ -261,6 +263,8 @@ def build_optimizer(
params: String -> Any dictionary representing the optimizer params.
This should contain optimizer specific parameters such as
`base_learning_rate`, `decay`, etc.
model: The `tf.keras.Model`. This is used for the shadow copy if using
`MovingAverage`.
Returns:
A tf.keras.Optimizer.
...
...
@@ -322,10 +326,13 @@ def build_optimizer(
# Moving average should be applied last, as it's applied at test time
moving_average_decay
=
params
.
get
(
'moving_average_decay'
,
0.
)
if
moving_average_decay
is
not
None
and
moving_average_decay
>
0.
:
if
model
is
None
:
raise
ValueError
(
'`model` must be provided if using `MovingAverage`.'
)
logging
.
info
(
'Including moving average decay.'
)
optimizer
=
MovingAverage
(
optimizer
,
optimizer
=
optimizer
,
average_decay
=
moving_average_decay
)
optimizer
.
shadow_copy
(
model
)
return
optimizer
...
...
official/vision/image_classification/optimizer_factory_test.py
View file @
657dcda5
...
...
@@ -19,15 +19,21 @@ from __future__ import division
# from __future__ import google_type_annotations
from
__future__
import
print_function
import
tensorflow
as
tf
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.image_classification
import
optimizer_factory
from
official.vision.image_classification.configs
import
base_configs
class
OptimizerFactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
build_toy_model
(
self
)
->
tf
.
keras
.
Model
:
"""Creates a toy `tf.Keras.Model`."""
model
=
tf
.
keras
.
Sequential
()
model
.
add
(
tf
.
keras
.
layers
.
Dense
(
1
,
input_shape
=
(
1
,)))
return
model
@
parameterized
.
named_parameters
(
(
'sgd'
,
'sgd'
,
0.
,
False
),
(
'momentum'
,
'momentum'
,
0.
,
False
),
...
...
@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
(
'rmsprop_ema'
,
'rmsprop'
,
0.999
,
False
))
def
test_optimizer
(
self
,
optimizer_name
,
moving_average_decay
,
lookahead
):
"""Smoke test to be sure no syntax errors."""
model
=
self
.
build_toy_model
()
params
=
{
'learning_rate'
:
0.001
,
'rho'
:
0.09
,
...
...
@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
optimizer
=
optimizer_factory
.
build_optimizer
(
optimizer_name
=
optimizer_name
,
base_learning_rate
=
params
[
'learning_rate'
],
params
=
params
)
params
=
params
,
model
=
model
)
self
.
assertTrue
(
issubclass
(
type
(
optimizer
),
tf
.
keras
.
optimizers
.
Optimizer
))
def
test_unknown_optimizer
(
self
):
...
...
research/delf/delf/protos/delf_config.proto
View file @
657dcda5
...
...
@@ -86,6 +86,9 @@ message DelfConfig {
// Path to DELF model.
optional
string
model_path
=
1
;
// Required.
// Whether model has been exported using TF version 2+.
optional
bool
is_tf2_exported
=
10
[
default
=
false
];
// Image scales to be used.
repeated
float
image_scales
=
2
;
...
...
research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
View file @
657dcda5
...
...
@@ -131,7 +131,7 @@ def main(argv):
delf_dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
features_placeholder
))
delf_dataset
=
delf_dataset
.
shuffle
(
1000
).
batch
(
features_for_clustering
.
shape
[
0
])
iterator
=
delf_
data
set
.
make_initializable_iterator
()
iterator
=
tf
.
compat
.
v1
.
data
.
make_initializable_iterator
(
delf_dataset
)
def
_initializer_fn
(
sess
):
"""Initialize dataset iterator, feed in the data."""
...
...
research/delf/delf/python/examples/extractor.py
View file @
657dcda5
...
...
@@ -102,7 +102,15 @@ def MakeExtractor(config):
Returns:
Function that receives an image and returns features.
Raises:
ValueError: if config is invalid.
"""
# Assert the configuration
if
config
.
use_global_features
and
hasattr
(
config
,
'is_tf2_exported'
)
and
config
.
is_tf2_exported
:
raise
ValueError
(
'use_global_features is incompatible with is_tf2_exported'
)
# Load model.
model
=
tf
.
saved_model
.
load
(
config
.
model_path
)
...
...
@@ -178,7 +186,8 @@ def MakeExtractor(config):
else
:
global_pca_parameters
[
'variances'
]
=
None
model
=
model
.
prune
(
feeds
=
feeds
,
fetches
=
fetches
)
if
not
hasattr
(
config
,
'is_tf2_exported'
)
or
not
config
.
is_tf2_exported
:
model
=
model
.
prune
(
feeds
=
feeds
,
fetches
=
fetches
)
def
ExtractorFn
(
image
,
resize_factor
=
1.0
):
"""Receives an image and returns DELF global and/or local features.
...
...
@@ -197,7 +206,6 @@ def MakeExtractor(config):
features (key 'local_features' mapping to a dict with keys 'locations',
'descriptors', 'scales', 'attention').
"""
resized_image
,
scale_factors
=
ResizeImage
(
image
,
config
,
resize_factor
=
resize_factor
)
...
...
@@ -224,8 +232,20 @@ def MakeExtractor(config):
output
=
None
if
config
.
use_local_features
:
output
=
model
(
image_tensor
,
image_scales_tensor
,
score_threshold_tensor
,
max_feature_num_tensor
)
if
hasattr
(
config
,
'is_tf2_exported'
)
and
config
.
is_tf2_exported
:
predict
=
model
.
signatures
[
'serving_default'
]
output_dict
=
predict
(
input_image
=
image_tensor
,
input_scales
=
image_scales_tensor
,
input_max_feature_num
=
max_feature_num_tensor
,
input_abs_thres
=
score_threshold_tensor
)
output
=
[
output_dict
[
'boxes'
],
output_dict
[
'features'
],
output_dict
[
'scales'
],
output_dict
[
'scores'
]
]
else
:
output
=
model
(
image_tensor
,
image_scales_tensor
,
score_threshold_tensor
,
max_feature_num_tensor
)
else
:
output
=
model
(
image_tensor
,
image_scales_tensor
)
...
...
research/delf/delf/python/feature_aggregation_extractor.py
View file @
657dcda5
...
...
@@ -269,8 +269,7 @@ class ExtractAggregatedRepresentation(object):
axis
=
0
),
[
num_assignments
,
1
])
-
tf
.
gather
(
codebook
,
selected_visual_words
[
ind
])
return
ind
+
1
,
tf
.
tensor_scatter_nd_add
(
vlad
,
tf
.
expand_dims
(
selected_visual_words
[
ind
],
axis
=
1
),
tf
.
cast
(
diff
,
dtype
=
tf
.
float32
))
vlad
,
tf
.
expand_dims
(
selected_visual_words
[
ind
],
axis
=
1
),
diff
)
ind_vlad
=
tf
.
constant
(
0
,
dtype
=
tf
.
int32
)
keep_going
=
lambda
j
,
vlad
:
tf
.
less
(
j
,
num_features
)
...
...
@@ -396,9 +395,7 @@ class ExtractAggregatedRepresentation(object):
visual_words
=
tf
.
reshape
(
tf
.
where
(
tf
.
greater
(
per_centroid_norms
,
tf
.
cast
(
tf
.
sqrt
(
_NORM_SQUARED_TOLERANCE
),
dtype
=
tf
.
float32
))),
tf
.
greater
(
per_centroid_norms
,
tf
.
sqrt
(
_NORM_SQUARED_TOLERANCE
))),
[
-
1
])
per_centroid_normalized_vector
=
tf
.
math
.
l2_normalize
(
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment