Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
32e4ca51
Commit
32e4ca51
authored
Nov 28, 2023
by
qianyj
Browse files
Update code to v2.11.0
parents
9485aa1d
71060f67
Changes
775
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1098 additions
and
15 deletions
+1098
-15
official/projects/detr/ops/matchers_test.py
official/projects/detr/ops/matchers_test.py
+95
-0
official/projects/detr/optimization.py
official/projects/detr/optimization.py
+147
-0
official/projects/detr/tasks/detection.py
official/projects/detr/tasks/detection.py
+402
-0
official/projects/detr/tasks/detection_test.py
official/projects/detr/tasks/detection_test.py
+203
-0
official/projects/detr/train.py
official/projects/detr/train.py
+70
-0
official/projects/edgetpu/nlp/__init__.py
official/projects/edgetpu/nlp/__init__.py
+1
-1
official/projects/edgetpu/nlp/configs/__init__.py
official/projects/edgetpu/nlp/configs/__init__.py
+1
-1
official/projects/edgetpu/nlp/configs/params.py
official/projects/edgetpu/nlp/configs/params.py
+1
-1
official/projects/edgetpu/nlp/experiments/downstream_tasks/mobilebert_baseline.yaml
...nlp/experiments/downstream_tasks/mobilebert_baseline.yaml
+1
-1
official/projects/edgetpu/nlp/experiments/downstream_tasks/mobilebert_edgetpu_xxs.yaml
.../experiments/downstream_tasks/mobilebert_edgetpu_xxs.yaml
+23
-0
official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_xxs.yaml
...jects/edgetpu/nlp/experiments/mobilebert_edgetpu_xxs.yaml
+142
-0
official/projects/edgetpu/nlp/mobilebert_edgetpu_trainer.py
official/projects/edgetpu/nlp/mobilebert_edgetpu_trainer.py
+1
-1
official/projects/edgetpu/nlp/mobilebert_edgetpu_trainer_test.py
...l/projects/edgetpu/nlp/mobilebert_edgetpu_trainer_test.py
+1
-1
official/projects/edgetpu/nlp/modeling/__init__.py
official/projects/edgetpu/nlp/modeling/__init__.py
+1
-1
official/projects/edgetpu/nlp/modeling/edgetpu_layers.py
official/projects/edgetpu/nlp/modeling/edgetpu_layers.py
+2
-2
official/projects/edgetpu/nlp/modeling/edgetpu_layers_test.py
...cial/projects/edgetpu/nlp/modeling/edgetpu_layers_test.py
+1
-1
official/projects/edgetpu/nlp/modeling/encoder.py
official/projects/edgetpu/nlp/modeling/encoder.py
+2
-2
official/projects/edgetpu/nlp/modeling/model_builder.py
official/projects/edgetpu/nlp/modeling/model_builder.py
+2
-1
official/projects/edgetpu/nlp/modeling/model_builder_test.py
official/projects/edgetpu/nlp/modeling/model_builder_test.py
+1
-1
official/projects/edgetpu/nlp/modeling/pretrainer.py
official/projects/edgetpu/nlp/modeling/pretrainer.py
+1
-1
No files found.
Too many changes to show.
To preserve performance only
775 of 775+
files are displayed.
Plain diff
Email patch
official/projects/detr/ops/matchers_test.py
0 → 100644
View file @
32e4ca51
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_models.official.projects.detr.ops.matchers."""
import
numpy
as
np
from
scipy
import
optimize
import
tensorflow
as
tf
from
official.projects.detr.ops
import
matchers
class
MatchersOpsTest
(
tf
.
test
.
TestCase
):
def
testLinearSumAssignment
(
self
):
"""Check a simple 2D test case of the Linear Sum Assignment problem.
Ensures that the implementation of the matching algorithm is correct
and functional on TPUs.
"""
cost_matrix
=
np
.
array
([[[
4
,
1
,
3
],
[
2
,
0
,
5
],
[
3
,
2
,
2
]]],
dtype
=
np
.
float32
)
_
,
adjacency_matrix
=
matchers
.
hungarian_matching
(
tf
.
constant
(
cost_matrix
))
adjacency_output
=
adjacency_matrix
.
numpy
()
correct_output
=
np
.
array
([
[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
],
],
dtype
=
bool
)
self
.
assertAllEqual
(
adjacency_output
[
0
],
correct_output
)
def
testBatchedLinearSumAssignment
(
self
):
"""Check a batched case of the Linear Sum Assignment Problem.
Ensures that a correct solution is found for all inputted problems within
a batch.
"""
cost_matrix
=
np
.
array
([
[[
4
,
1
,
3
],
[
2
,
0
,
5
],
[
3
,
2
,
2
]],
[[
1
,
4
,
3
],
[
0
,
2
,
5
],
[
2
,
3
,
2
]],
[[
1
,
3
,
4
],
[
0
,
5
,
2
],
[
2
,
2
,
3
]],
],
dtype
=
np
.
float32
)
_
,
adjacency_matrix
=
matchers
.
hungarian_matching
(
tf
.
constant
(
cost_matrix
))
adjacency_output
=
adjacency_matrix
.
numpy
()
# Hand solved correct output for the linear sum assignment problem
correct_output
=
np
.
array
([
[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
1
,
0
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]],
[[
1
,
0
,
0
],
[
0
,
0
,
1
],
[
0
,
1
,
0
]],
],
dtype
=
bool
)
self
.
assertAllClose
(
adjacency_output
,
correct_output
)
def
testMaximumBipartiteMatching
(
self
):
"""Check that the maximum bipartite match assigns the correct numbers."""
adj_matrix
=
tf
.
cast
([[
[
1
,
0
,
0
,
0
,
1
],
[
0
,
1
,
0
,
1
,
0
],
[
0
,
0
,
1
,
0
,
0
],
[
0
,
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
,
0
],
]],
tf
.
bool
)
_
,
assignment
=
matchers
.
_maximum_bipartite_matching
(
adj_matrix
)
self
.
assertEqual
(
np
.
sum
(
assignment
.
numpy
()),
5
)
def
testAssignmentMatchesScipy
(
self
):
"""Check that the Linear Sum Assignment matches the Scipy implementation."""
batch_size
,
num_elems
=
2
,
25
weights
=
tf
.
random
.
uniform
((
batch_size
,
num_elems
,
num_elems
),
minval
=
0.
,
maxval
=
1.
)
weights
,
assignment
=
matchers
.
hungarian_matching
(
weights
)
for
idx
in
range
(
batch_size
):
_
,
scipy_assignment
=
optimize
.
linear_sum_assignment
(
weights
.
numpy
()[
idx
])
hungarian_assignment
=
np
.
where
(
assignment
.
numpy
()[
idx
])[
1
]
self
.
assertAllEqual
(
hungarian_assignment
,
scipy_assignment
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/detr/optimization.py
0 → 100644
View file @
32e4ca51
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Customized optimizer to match paper results."""
import
dataclasses
import
tensorflow
as
tf
from
official.modeling
import
optimization
from
official.nlp
import
optimization
as
nlp_optimization
@
dataclasses
.
dataclass
class
DETRAdamWConfig
(
optimization
.
AdamWeightDecayConfig
):
pass
@
dataclasses
.
dataclass
class
OptimizerConfig
(
optimization
.
OptimizerConfig
):
detr_adamw
:
DETRAdamWConfig
=
DETRAdamWConfig
()
@
dataclasses
.
dataclass
class
OptimizationConfig
(
optimization
.
OptimizationConfig
):
"""Configuration for optimizer and learning rate schedule.
Attributes:
optimizer: optimizer oneof config.
ema: optional exponential moving average optimizer config, if specified, ema
optimizer will be used.
learning_rate: learning rate oneof config.
warmup: warmup oneof config.
"""
optimizer
:
OptimizerConfig
=
OptimizerConfig
()
# TODO(frederickliu): figure out how to make this configuable.
# TODO(frederickliu): Study if this is needed.
class
_DETRAdamW
(
nlp_optimization
.
AdamWeightDecay
):
"""Custom AdamW to support different lr scaling for backbone.
The code is copied from AdamWeightDecay and Adam with learning scaling.
"""
def
_resource_apply_dense
(
self
,
grad
,
var
,
apply_state
=
None
):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
apply_state
=
kwargs
[
'apply_state'
]
if
'detr'
not
in
var
.
name
:
lr_t
*=
0.1
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
var_device
,
var_dtype
=
var
.
device
,
var
.
dtype
.
base_dtype
coefficients
=
((
apply_state
or
{}).
get
((
var_device
,
var_dtype
))
or
self
.
_fallback_apply_state
(
var_device
,
var_dtype
))
m
=
self
.
get_slot
(
var
,
'm'
)
v
=
self
.
get_slot
(
var
,
'v'
)
lr
=
coefficients
[
'lr_t'
]
*
0.1
if
'detr'
not
in
var
.
name
else
coefficients
[
'lr_t'
]
if
not
self
.
amsgrad
:
return
tf
.
raw_ops
.
ResourceApplyAdam
(
var
=
var
.
handle
,
m
=
m
.
handle
,
v
=
v
.
handle
,
beta1_power
=
coefficients
[
'beta_1_power'
],
beta2_power
=
coefficients
[
'beta_2_power'
],
lr
=
lr
,
beta1
=
coefficients
[
'beta_1_t'
],
beta2
=
coefficients
[
'beta_2_t'
],
epsilon
=
coefficients
[
'epsilon'
],
grad
=
grad
,
use_locking
=
self
.
_use_locking
)
else
:
vhat
=
self
.
get_slot
(
var
,
'vhat'
)
return
tf
.
raw_ops
.
ResourceApplyAdamWithAmsgrad
(
var
=
var
.
handle
,
m
=
m
.
handle
,
v
=
v
.
handle
,
vhat
=
vhat
.
handle
,
beta1_power
=
coefficients
[
'beta_1_power'
],
beta2_power
=
coefficients
[
'beta_2_power'
],
lr
=
lr
,
beta1
=
coefficients
[
'beta_1_t'
],
beta2
=
coefficients
[
'beta_2_t'
],
epsilon
=
coefficients
[
'epsilon'
],
grad
=
grad
,
use_locking
=
self
.
_use_locking
)
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
,
apply_state
=
None
):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
apply_state
=
kwargs
[
'apply_state'
]
if
'detr'
not
in
var
.
name
:
lr_t
*=
0.1
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
var_device
,
var_dtype
=
var
.
device
,
var
.
dtype
.
base_dtype
coefficients
=
((
apply_state
or
{}).
get
((
var_device
,
var_dtype
))
or
self
.
_fallback_apply_state
(
var_device
,
var_dtype
))
# m_t = beta1 * m + (1 - beta1) * g_t
m
=
self
.
get_slot
(
var
,
'm'
)
m_scaled_g_values
=
grad
*
coefficients
[
'one_minus_beta_1_t'
]
m_t
=
tf
.
compat
.
v1
.
assign
(
m
,
m
*
coefficients
[
'beta_1_t'
],
use_locking
=
self
.
_use_locking
)
with
tf
.
control_dependencies
([
m_t
]):
m_t
=
self
.
_resource_scatter_add
(
m
,
indices
,
m_scaled_g_values
)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v
=
self
.
get_slot
(
var
,
'v'
)
v_scaled_g_values
=
(
grad
*
grad
)
*
coefficients
[
'one_minus_beta_2_t'
]
v_t
=
tf
.
compat
.
v1
.
assign
(
v
,
v
*
coefficients
[
'beta_2_t'
],
use_locking
=
self
.
_use_locking
)
with
tf
.
control_dependencies
([
v_t
]):
v_t
=
self
.
_resource_scatter_add
(
v
,
indices
,
v_scaled_g_values
)
lr
=
coefficients
[
'lr_t'
]
*
0.1
if
'detr'
not
in
var
.
name
else
coefficients
[
'lr_t'
]
if
not
self
.
amsgrad
:
v_sqrt
=
tf
.
sqrt
(
v_t
)
var_update
=
tf
.
compat
.
v1
.
assign_sub
(
var
,
lr
*
m_t
/
(
v_sqrt
+
coefficients
[
'epsilon'
]),
use_locking
=
self
.
_use_locking
)
return
tf
.
group
(
*
[
var_update
,
m_t
,
v_t
])
else
:
v_hat
=
self
.
get_slot
(
var
,
'vhat'
)
v_hat_t
=
tf
.
maximum
(
v_hat
,
v_t
)
with
tf
.
control_dependencies
([
v_hat_t
]):
v_hat_t
=
tf
.
compat
.
v1
.
assign
(
v_hat
,
v_hat_t
,
use_locking
=
self
.
_use_locking
)
v_hat_sqrt
=
tf
.
sqrt
(
v_hat_t
)
var_update
=
tf
.
compat
.
v1
.
assign_sub
(
var
,
lr
*
m_t
/
(
v_hat_sqrt
+
coefficients
[
'epsilon'
]),
use_locking
=
self
.
_use_locking
)
return
tf
.
group
(
*
[
var_update
,
m_t
,
v_t
,
v_hat_t
])
optimization
.
register_optimizer_cls
(
'detr_adamw'
,
_DETRAdamW
)
official/projects/detr/tasks/detection.py
0 → 100644
View file @
32e4ca51
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DETR detection task definition."""
from
typing
import
Optional
from
absl
import
logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
task_factory
from
official.projects.detr.configs
import
detr
as
detr_cfg
from
official.projects.detr.dataloaders
import
coco
from
official.projects.detr.dataloaders
import
detr_input
from
official.projects.detr.modeling
import
detr
from
official.projects.detr.ops
import
matchers
from
official.vision.dataloaders
import
input_reader_factory
from
official.vision.dataloaders
import
tf_example_decoder
from
official.vision.dataloaders
import
tfds_factory
from
official.vision.dataloaders
import
tf_example_label_map_decoder
from
official.vision.evaluation
import
coco_evaluator
from
official.vision.modeling
import
backbones
from
official.vision.ops
import
box_ops
@
task_factory
.
register_task_cls
(
detr_cfg
.
DetrTask
)
class
DetectionTask
(
base_task
.
Task
):
"""A single-replica view of training procedure.
DETR task provides artifacts for training/evalution procedures, including
loading/iterating over Datasets, initializing the model, calculating the loss,
post-processing, and customized metrics with reduction.
"""
def
build_model
(
self
):
"""Build DETR model."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
]
+
self
.
_task_config
.
model
.
input_size
)
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
self
.
_task_config
.
model
.
backbone
,
norm_activation_config
=
self
.
_task_config
.
model
.
norm_activation
)
model
=
detr
.
DETR
(
backbone
,
self
.
_task_config
.
model
.
backbone_endpoint_name
,
self
.
_task_config
.
model
.
num_queries
,
self
.
_task_config
.
model
.
hidden_size
,
self
.
_task_config
.
model
.
num_classes
,
self
.
_task_config
.
model
.
num_encoder_layers
,
self
.
_task_config
.
model
.
num_decoder_layers
)
return
model
def
initialize
(
self
,
model
:
tf
.
keras
.
Model
):
"""Loading pretrained checkpoint."""
if
not
self
.
_task_config
.
init_checkpoint
:
return
ckpt_dir_or_file
=
self
.
_task_config
.
init_checkpoint
# Restoring checkpoint.
if
tf
.
io
.
gfile
.
isdir
(
ckpt_dir_or_file
):
ckpt_dir_or_file
=
tf
.
train
.
latest_checkpoint
(
ckpt_dir_or_file
)
if
self
.
_task_config
.
init_checkpoint_modules
==
'all'
:
ckpt
=
tf
.
train
.
Checkpoint
(
**
model
.
checkpoint_items
)
status
=
ckpt
.
restore
(
ckpt_dir_or_file
)
status
.
assert_consumed
()
elif
self
.
_task_config
.
init_checkpoint_modules
==
'backbone'
:
ckpt
=
tf
.
train
.
Checkpoint
(
backbone
=
model
.
backbone
)
status
=
ckpt
.
restore
(
ckpt_dir_or_file
)
status
.
expect_partial
().
assert_existing_objects_matched
()
logging
.
info
(
'Finished loading pretrained checkpoint from %s'
,
ckpt_dir_or_file
)
def
build_inputs
(
self
,
params
,
input_context
:
Optional
[
tf
.
distribute
.
InputContext
]
=
None
):
"""Build input dataset."""
if
isinstance
(
params
,
coco
.
COCODataConfig
):
dataset
=
coco
.
COCODataLoader
(
params
).
load
(
input_context
)
else
:
if
params
.
tfds_name
:
decoder
=
tfds_factory
.
get_detection_decoder
(
params
.
tfds_name
)
else
:
decoder_cfg
=
params
.
decoder
.
get
()
if
params
.
decoder
.
type
==
'simple_decoder'
:
decoder
=
tf_example_decoder
.
TfExampleDecoder
(
regenerate_source_id
=
decoder_cfg
.
regenerate_source_id
)
elif
params
.
decoder
.
type
==
'label_map_decoder'
:
decoder
=
tf_example_label_map_decoder
.
TfExampleDecoderLabelMap
(
label_map
=
decoder_cfg
.
label_map
,
regenerate_source_id
=
decoder_cfg
.
regenerate_source_id
)
else
:
raise
ValueError
(
'Unknown decoder type: {}!'
.
format
(
params
.
decoder
.
type
))
parser
=
detr_input
.
Parser
(
class_offset
=
self
.
_task_config
.
losses
.
class_offset
,
output_size
=
self
.
_task_config
.
model
.
input_size
[:
2
],
)
reader
=
input_reader_factory
.
input_reader_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
parser_fn
=
parser
.
parse_fn
(
params
.
is_training
))
dataset
=
reader
.
read
(
input_context
=
input_context
)
return
dataset
def
_compute_cost
(
self
,
cls_outputs
,
box_outputs
,
cls_targets
,
box_targets
):
# Approximate classification cost with 1 - prob[target class].
# The 1 is a constant that doesn't change the matching, it can be ommitted.
# background: 0
cls_cost
=
self
.
_task_config
.
losses
.
lambda_cls
*
tf
.
gather
(
-
tf
.
nn
.
softmax
(
cls_outputs
),
cls_targets
,
batch_dims
=
1
,
axis
=-
1
)
# Compute the L1 cost between boxes,
paired_differences
=
self
.
_task_config
.
losses
.
lambda_box
*
tf
.
abs
(
tf
.
expand_dims
(
box_outputs
,
2
)
-
tf
.
expand_dims
(
box_targets
,
1
))
box_cost
=
tf
.
reduce_sum
(
paired_differences
,
axis
=-
1
)
# Compute the giou cost betwen boxes
giou_cost
=
self
.
_task_config
.
losses
.
lambda_giou
*
-
box_ops
.
bbox_generalized_overlap
(
box_ops
.
cycxhw_to_yxyx
(
box_outputs
),
box_ops
.
cycxhw_to_yxyx
(
box_targets
))
total_cost
=
cls_cost
+
box_cost
+
giou_cost
max_cost
=
(
self
.
_task_config
.
losses
.
lambda_cls
*
0.0
+
self
.
_task_config
.
losses
.
lambda_box
*
4.
+
self
.
_task_config
.
losses
.
lambda_giou
*
0.0
)
# Set pads to large constant
valid
=
tf
.
expand_dims
(
tf
.
cast
(
tf
.
not_equal
(
cls_targets
,
0
),
dtype
=
total_cost
.
dtype
),
axis
=
1
)
total_cost
=
(
1
-
valid
)
*
max_cost
+
valid
*
total_cost
# Set inf of nan to large constant
total_cost
=
tf
.
where
(
tf
.
logical_or
(
tf
.
math
.
is_nan
(
total_cost
),
tf
.
math
.
is_inf
(
total_cost
)),
max_cost
*
tf
.
ones_like
(
total_cost
,
dtype
=
total_cost
.
dtype
),
total_cost
)
return
total_cost
def
build_losses
(
self
,
outputs
,
labels
,
aux_losses
=
None
):
"""Build DETR losses."""
cls_outputs
=
outputs
[
'cls_outputs'
]
box_outputs
=
outputs
[
'box_outputs'
]
cls_targets
=
labels
[
'classes'
]
box_targets
=
labels
[
'boxes'
]
cost
=
self
.
_compute_cost
(
cls_outputs
,
box_outputs
,
cls_targets
,
box_targets
)
_
,
indices
=
matchers
.
hungarian_matching
(
cost
)
indices
=
tf
.
stop_gradient
(
indices
)
target_index
=
tf
.
math
.
argmax
(
indices
,
axis
=
1
)
cls_assigned
=
tf
.
gather
(
cls_outputs
,
target_index
,
batch_dims
=
1
,
axis
=
1
)
box_assigned
=
tf
.
gather
(
box_outputs
,
target_index
,
batch_dims
=
1
,
axis
=
1
)
background
=
tf
.
equal
(
cls_targets
,
0
)
num_boxes
=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
logical_not
(
background
),
tf
.
float32
),
axis
=-
1
)
# Down-weight background to account for class imbalance.
xentropy
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
labels
=
cls_targets
,
logits
=
cls_assigned
)
cls_loss
=
self
.
_task_config
.
losses
.
lambda_cls
*
tf
.
where
(
background
,
self
.
_task_config
.
losses
.
background_cls_weight
*
xentropy
,
xentropy
)
cls_weights
=
tf
.
where
(
background
,
self
.
_task_config
.
losses
.
background_cls_weight
*
tf
.
ones_like
(
cls_loss
),
tf
.
ones_like
(
cls_loss
))
# Box loss is only calculated on non-background class.
l_1
=
tf
.
reduce_sum
(
tf
.
abs
(
box_assigned
-
box_targets
),
axis
=-
1
)
box_loss
=
self
.
_task_config
.
losses
.
lambda_box
*
tf
.
where
(
background
,
tf
.
zeros_like
(
l_1
),
l_1
)
# Giou loss is only calculated on non-background class.
giou
=
tf
.
linalg
.
diag_part
(
1.0
-
box_ops
.
bbox_generalized_overlap
(
box_ops
.
cycxhw_to_yxyx
(
box_assigned
),
box_ops
.
cycxhw_to_yxyx
(
box_targets
)
))
giou_loss
=
self
.
_task_config
.
losses
.
lambda_giou
*
tf
.
where
(
background
,
tf
.
zeros_like
(
giou
),
giou
)
# Consider doing all reduce once in train_step to speed up.
num_boxes_per_replica
=
tf
.
reduce_sum
(
num_boxes
)
cls_weights_per_replica
=
tf
.
reduce_sum
(
cls_weights
)
replica_context
=
tf
.
distribute
.
get_replica_context
()
num_boxes_sum
,
cls_weights_sum
=
replica_context
.
all_reduce
(
tf
.
distribute
.
ReduceOp
.
SUM
,
[
num_boxes_per_replica
,
cls_weights_per_replica
])
cls_loss
=
tf
.
math
.
divide_no_nan
(
tf
.
reduce_sum
(
cls_loss
),
cls_weights_sum
)
box_loss
=
tf
.
math
.
divide_no_nan
(
tf
.
reduce_sum
(
box_loss
),
num_boxes_sum
)
giou_loss
=
tf
.
math
.
divide_no_nan
(
tf
.
reduce_sum
(
giou_loss
),
num_boxes_sum
)
aux_losses
=
tf
.
add_n
(
aux_losses
)
if
aux_losses
else
0.0
total_loss
=
cls_loss
+
box_loss
+
giou_loss
+
aux_losses
return
total_loss
,
cls_loss
,
box_loss
,
giou_loss
def
build_metrics
(
self
,
training
=
True
):
"""Build detection metrics."""
metrics
=
[]
metric_names
=
[
'cls_loss'
,
'box_loss'
,
'giou_loss'
]
for
name
in
metric_names
:
metrics
.
append
(
tf
.
keras
.
metrics
.
Mean
(
name
,
dtype
=
tf
.
float32
))
if
not
training
:
self
.
coco_metric
=
coco_evaluator
.
COCOEvaluator
(
annotation_file
=
self
.
_task_config
.
annotation_file
,
include_mask
=
False
,
need_rescale_bboxes
=
True
,
per_category_metrics
=
self
.
_task_config
.
per_category_metrics
)
return
metrics
def
train_step
(
self
,
inputs
,
model
,
optimizer
,
metrics
=
None
):
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features
,
labels
=
inputs
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
features
,
training
=
True
)
loss
=
0.0
cls_loss
=
0.0
box_loss
=
0.0
giou_loss
=
0.0
for
output
in
outputs
:
# Computes per-replica loss.
layer_loss
,
layer_cls_loss
,
layer_box_loss
,
layer_giou_loss
=
self
.
build_losses
(
outputs
=
output
,
labels
=
labels
,
aux_losses
=
model
.
losses
)
loss
+=
layer_loss
cls_loss
+=
layer_cls_loss
box_loss
+=
layer_box_loss
giou_loss
+=
layer_giou_loss
# Consider moving scaling logic from build_losses to here.
scaled_loss
=
loss
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
scaled_loss
=
optimizer
.
get_scaled_loss
(
scaled_loss
)
tvars
=
model
.
trainable_variables
grads
=
tape
.
gradient
(
scaled_loss
,
tvars
)
# Scales back gradient when LossScaleOptimizer is used.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
grads
=
optimizer
.
get_unscaled_gradients
(
grads
)
optimizer
.
apply_gradients
(
list
(
zip
(
grads
,
tvars
)))
# Multiply for logging.
# Since we expect the gradient replica sum to happen in the optimizer,
# the loss is scaled with global num_boxes and weights.
# To have it more interpretable/comparable we scale it back when logging.
num_replicas_in_sync
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
loss
*=
num_replicas_in_sync
cls_loss
*=
num_replicas_in_sync
box_loss
*=
num_replicas_in_sync
giou_loss
*=
num_replicas_in_sync
# Trainer class handles loss metric for you.
logs
=
{
self
.
loss
:
loss
}
all_losses
=
{
'cls_loss'
:
cls_loss
,
'box_loss'
:
box_loss
,
'giou_loss'
:
giou_loss
,
}
# Metric results will be added to logs for you.
if
metrics
:
for
m
in
metrics
:
m
.
update_state
(
all_losses
[
m
.
name
])
return
logs
def
validation_step
(
self
,
inputs
,
model
,
metrics
=
None
):
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features
,
labels
=
inputs
outputs
=
model
(
features
,
training
=
False
)[
-
1
]
loss
,
cls_loss
,
box_loss
,
giou_loss
=
self
.
build_losses
(
outputs
=
outputs
,
labels
=
labels
,
aux_losses
=
model
.
losses
)
# Multiply for logging.
# Since we expect the gradient replica sum to happen in the optimizer,
# the loss is scaled with global num_boxes and weights.
# To have it more interpretable/comparable we scale it back when logging.
num_replicas_in_sync
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
loss
*=
num_replicas_in_sync
cls_loss
*=
num_replicas_in_sync
box_loss
*=
num_replicas_in_sync
giou_loss
*=
num_replicas_in_sync
# Evaluator class handles loss metric for you.
logs
=
{
self
.
loss
:
loss
}
predictions
=
{
'detection_boxes'
:
box_ops
.
cycxhw_to_yxyx
(
outputs
[
'box_outputs'
])
*
tf
.
expand_dims
(
tf
.
concat
([
labels
[
'image_info'
][:,
1
:
2
,
0
],
labels
[
'image_info'
][:,
1
:
2
,
1
],
labels
[
'image_info'
][:,
1
:
2
,
0
],
labels
[
'image_info'
][:,
1
:
2
,
1
]
],
axis
=
1
),
axis
=
1
),
'detection_scores'
:
tf
.
math
.
reduce_max
(
tf
.
nn
.
softmax
(
outputs
[
'cls_outputs'
])[:,
:,
1
:],
axis
=-
1
),
'detection_classes'
:
tf
.
math
.
argmax
(
outputs
[
'cls_outputs'
][:,
:,
1
:],
axis
=-
1
)
+
1
,
# Fix this. It's not being used at the moment.
'num_detections'
:
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
math
.
greater
(
tf
.
math
.
reduce_max
(
outputs
[
'cls_outputs'
],
axis
=-
1
),
0
),
tf
.
int32
),
axis
=-
1
),
'source_id'
:
labels
[
'id'
],
'image_info'
:
labels
[
'image_info'
]
}
ground_truths
=
{
'source_id'
:
labels
[
'id'
],
'height'
:
labels
[
'image_info'
][:,
0
:
1
,
0
],
'width'
:
labels
[
'image_info'
][:,
0
:
1
,
1
],
'num_detections'
:
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
math
.
greater
(
labels
[
'classes'
],
0
),
tf
.
int32
),
axis
=-
1
),
'boxes'
:
labels
[
'gt_boxes'
],
'classes'
:
labels
[
'classes'
],
'is_crowds'
:
labels
[
'is_crowd'
]
}
logs
.
update
({
'predictions'
:
predictions
,
'ground_truths'
:
ground_truths
})
all_losses
=
{
'cls_loss'
:
cls_loss
,
'box_loss'
:
box_loss
,
'giou_loss'
:
giou_loss
,
}
# Metric results will be added to logs for you.
if
metrics
:
for
m
in
metrics
:
m
.
update_state
(
all_losses
[
m
.
name
])
return
logs
def
aggregate_logs
(
self
,
state
=
None
,
step_outputs
=
None
):
if
state
is
None
:
self
.
coco_metric
.
reset_states
()
state
=
self
.
coco_metric
state
.
update_state
(
step_outputs
[
'ground_truths'
],
step_outputs
[
'predictions'
])
return
state
def
reduce_aggregated_logs
(
self
,
aggregated_logs
,
global_step
=
None
):
return
aggregated_logs
.
result
()
official/projects/detr/tasks/detection_test.py
0 → 100644
View file @
32e4ca51
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for detection."""
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
from
official.projects.detr
import
optimization
from
official.projects.detr.configs
import
detr
as
detr_cfg
from
official.projects.detr.dataloaders
import
coco
from
official.projects.detr.tasks
import
detection
from
official.vision.configs
import
backbones
_NUM_EXAMPLES
=
10
def
_gen_fn
():
h
=
np
.
random
.
randint
(
0
,
300
)
w
=
np
.
random
.
randint
(
0
,
300
)
num_boxes
=
np
.
random
.
randint
(
0
,
50
)
return
{
'image'
:
np
.
ones
(
shape
=
(
h
,
w
,
3
),
dtype
=
np
.
uint8
),
'image/id'
:
np
.
random
.
randint
(
0
,
100
),
'image/filename'
:
'test'
,
'objects'
:
{
'is_crowd'
:
np
.
ones
(
shape
=
(
num_boxes
),
dtype
=
np
.
bool
),
'bbox'
:
np
.
ones
(
shape
=
(
num_boxes
,
4
),
dtype
=
np
.
float32
),
'label'
:
np
.
ones
(
shape
=
(
num_boxes
),
dtype
=
np
.
int64
),
'id'
:
np
.
ones
(
shape
=
(
num_boxes
),
dtype
=
np
.
int64
),
'area'
:
np
.
ones
(
shape
=
(
num_boxes
),
dtype
=
np
.
int64
),
}
}
def
_as_dataset
(
self
,
*
args
,
**
kwargs
):
del
args
del
kwargs
return
tf
.
data
.
Dataset
.
from_generator
(
lambda
:
(
_gen_fn
()
for
i
in
range
(
_NUM_EXAMPLES
)),
output_types
=
self
.
info
.
features
.
dtype
,
output_shapes
=
self
.
info
.
features
.
shape
,
)
class
DetectionTest
(
tf
.
test
.
TestCase
):
def
test_train_step
(
self
):
config
=
detr_cfg
.
DetrTask
(
model
=
detr_cfg
.
Detr
(
input_size
=
[
1333
,
1333
,
3
],
num_encoder_layers
=
1
,
num_decoder_layers
=
1
,
num_classes
=
81
,
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
10
,
bn_trainable
=
False
))
),
train_data
=
coco
.
COCODataConfig
(
tfds_name
=
'coco/2017'
,
tfds_split
=
'validation'
,
is_training
=
True
,
global_batch_size
=
2
,
))
with
tfds
.
testing
.
mock_data
(
as_dataset_fn
=
_as_dataset
):
task
=
detection
.
DetectionTask
(
config
)
model
=
task
.
build_model
()
dataset
=
task
.
build_inputs
(
config
.
train_data
)
iterator
=
iter
(
dataset
)
opt_cfg
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'detr_adamw'
,
'detr_adamw'
:
{
'weight_decay_rate'
:
1e-4
,
'global_clipnorm'
:
0.1
,
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
120000
],
'values'
:
[
0.0001
,
1.0e-05
]
}
},
})
optimizer
=
detection
.
DetectionTask
.
create_optimizer
(
opt_cfg
)
task
.
train_step
(
next
(
iterator
),
model
,
optimizer
)
def
test_validation_step
(
self
):
config
=
detr_cfg
.
DetrTask
(
model
=
detr_cfg
.
Detr
(
input_size
=
[
1333
,
1333
,
3
],
num_encoder_layers
=
1
,
num_decoder_layers
=
1
,
num_classes
=
81
,
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
10
,
bn_trainable
=
False
))
),
validation_data
=
coco
.
COCODataConfig
(
tfds_name
=
'coco/2017'
,
tfds_split
=
'validation'
,
is_training
=
False
,
global_batch_size
=
2
,
))
with
tfds
.
testing
.
mock_data
(
as_dataset_fn
=
_as_dataset
):
task
=
detection
.
DetectionTask
(
config
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
(
training
=
False
)
dataset
=
task
.
build_inputs
(
config
.
validation_data
)
iterator
=
iter
(
dataset
)
logs
=
task
.
validation_step
(
next
(
iterator
),
model
,
metrics
)
state
=
task
.
aggregate_logs
(
step_outputs
=
logs
)
task
.
reduce_aggregated_logs
(
state
)
class
DetectionTFDSTest
(
tf
.
test
.
TestCase
):
def
test_train_step
(
self
):
config
=
detr_cfg
.
DetrTask
(
model
=
detr_cfg
.
Detr
(
input_size
=
[
1333
,
1333
,
3
],
num_encoder_layers
=
1
,
num_decoder_layers
=
1
,
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
10
,
bn_trainable
=
False
))
),
losses
=
detr_cfg
.
Losses
(
class_offset
=
1
),
train_data
=
detr_cfg
.
DataConfig
(
tfds_name
=
'coco/2017'
,
tfds_split
=
'validation'
,
is_training
=
True
,
global_batch_size
=
2
,
))
with
tfds
.
testing
.
mock_data
(
as_dataset_fn
=
_as_dataset
):
task
=
detection
.
DetectionTask
(
config
)
model
=
task
.
build_model
()
dataset
=
task
.
build_inputs
(
config
.
train_data
)
iterator
=
iter
(
dataset
)
opt_cfg
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'detr_adamw'
,
'detr_adamw'
:
{
'weight_decay_rate'
:
1e-4
,
'global_clipnorm'
:
0.1
,
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
120000
],
'values'
:
[
0.0001
,
1.0e-05
]
}
},
})
optimizer
=
detection
.
DetectionTask
.
create_optimizer
(
opt_cfg
)
task
.
train_step
(
next
(
iterator
),
model
,
optimizer
)
def
test_validation_step
(
self
):
config
=
detr_cfg
.
DetrTask
(
model
=
detr_cfg
.
Detr
(
input_size
=
[
1333
,
1333
,
3
],
num_encoder_layers
=
1
,
num_decoder_layers
=
1
,
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
10
,
bn_trainable
=
False
))
),
losses
=
detr_cfg
.
Losses
(
class_offset
=
1
),
validation_data
=
detr_cfg
.
DataConfig
(
tfds_name
=
'coco/2017'
,
tfds_split
=
'validation'
,
is_training
=
False
,
global_batch_size
=
2
,
))
with
tfds
.
testing
.
mock_data
(
as_dataset_fn
=
_as_dataset
):
task
=
detection
.
DetectionTask
(
config
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
(
training
=
False
)
dataset
=
task
.
build_inputs
(
config
.
validation_data
)
iterator
=
iter
(
dataset
)
logs
=
task
.
validation_step
(
next
(
iterator
),
model
,
metrics
)
state
=
task
.
aggregate_logs
(
step_outputs
=
logs
)
task
.
reduce_aggregated_logs
(
state
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/detr/train.py
0 → 100644
View file @
32e4ca51
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TensorFlow Model Garden Vision training driver."""
from
absl
import
app
from
absl
import
flags
import
gin
from
official.common
import
distribute_utils
from
official.common
import
flags
as
tfm_flags
from
official.core
import
task_factory
from
official.core
import
train_lib
from
official.core
import
train_utils
from
official.modeling
import
performance
# pylint: disable=unused-import
from
official.projects.detr.configs
import
detr
from
official.projects.detr.tasks
import
detection
# pylint: enable=unused-import
FLAGS
=
flags
.
FLAGS
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_file
,
FLAGS
.
gin_params
)
params
=
train_utils
.
parse_configuration
(
FLAGS
)
model_dir
=
FLAGS
.
model_dir
if
'train'
in
FLAGS
.
mode
:
# Pure eval modes do not output yaml files. Otherwise continuous eval job
# may race against the train job for writing the same file.
train_utils
.
serialize_config
(
params
,
model_dir
)
# Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16'
# can have significant impact on model speeds by utilizing float16 in case of
# GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when
# dtype is float16
if
params
.
runtime
.
mixed_precision_dtype
:
performance
.
set_mixed_precision_policy
(
params
.
runtime
.
mixed_precision_dtype
)
distribution_strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
params
.
runtime
.
distribution_strategy
,
all_reduce_alg
=
params
.
runtime
.
all_reduce_alg
,
num_gpus
=
params
.
runtime
.
num_gpus
,
tpu_address
=
params
.
runtime
.
tpu
)
with
distribution_strategy
.
scope
():
task
=
task_factory
.
get_task
(
params
.
task
,
logging_dir
=
model_dir
)
train_lib
.
run_experiment
(
distribution_strategy
=
distribution_strategy
,
task
=
task
,
mode
=
FLAGS
.
mode
,
params
=
params
,
model_dir
=
model_dir
)
train_utils
.
save_gin_config
(
FLAGS
.
mode
,
model_dir
)
if
__name__
==
'__main__'
:
tfm_flags
.
define_flags
()
flags
.
mark_flags_as_required
([
'experiment'
,
'mode'
,
'model_dir'
])
app
.
run
(
main
)
official/projects/edgetpu/nlp/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/configs/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/configs/params.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/experiments/downstream_tasks/mobilebert_baseline.yaml
View file @
32e4ca51
...
...
@@ -13,7 +13,7 @@ task:
num_attention_heads
:
4
intermediate_size
:
512
hidden_activation
:
relu
hidden_dropout_prob
:
0.
0
hidden_dropout_prob
:
0.
1
attention_probs_dropout_prob
:
0.1
intra_bottleneck_size
:
128
initializer_range
:
0.02
...
...
official/projects/edgetpu/nlp/experiments/downstream_tasks/mobilebert_edgetpu_xxs.yaml
0 → 100644
View file @
32e4ca51
# MobileBERT-EdgeTPU-XXS model.
task
:
model
:
encoder
:
type
:
mobilebert
mobilebert
:
word_vocab_size
:
30522
word_embed_size
:
128
type_vocab_size
:
2
max_sequence_length
:
512
num_blocks
:
6
hidden_size
:
512
num_attention_heads
:
4
intermediate_size
:
1024
hidden_activation
:
relu
hidden_dropout_prob
:
0.1
attention_probs_dropout_prob
:
0.1
intra_bottleneck_size
:
128
initializer_range
:
0.02
key_query_shared_bottleneck
:
true
num_feedforward_networks
:
2
normalization_type
:
no_norm
classifier_activation
:
false
official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_xxs.yaml
0 → 100644
View file @
32e4ca51
layer_wise_distillation
:
num_steps
:
30000
warmup_steps
:
0
initial_learning_rate
:
1.5e-3
end_learning_rate
:
1.5e-3
decay_steps
:
30000
end_to_end_distillation
:
num_steps
:
585000
warmup_steps
:
20000
initial_learning_rate
:
1.5e-3
end_learning_rate
:
1.5e-7
decay_steps
:
585000
distill_ground_truth_ratio
:
0.5
optimizer
:
optimizer
:
lamb
:
beta_1
:
0.9
beta_2
:
0.999
clipnorm
:
1.0
epsilon
:
1.0e-06
exclude_from_layer_adaptation
:
null
exclude_from_weight_decay
:
[
'
LayerNorm'
,
'
bias'
,
'
norm'
]
global_clipnorm
:
null
name
:
LAMB
weight_decay_rate
:
0.01
type
:
lamb
orbit_config
:
eval_interval
:
1000
eval_steps
:
-1
mode
:
train
steps_per_loop
:
1000
total_steps
:
825000
runtime
:
distribution_strategy
:
'
tpu'
student_model
:
cls_heads
:
[{
'
activation'
:
'
tanh'
,
'
cls_token_idx'
:
0
,
'
dropout_rate'
:
0.0
,
'
inner_dim'
:
512
,
'
name'
:
'
next_sentence'
,
'
num_classes'
:
2
}]
encoder
:
mobilebert
:
attention_probs_dropout_prob
:
0.1
classifier_activation
:
false
hidden_activation
:
relu
hidden_dropout_prob
:
0.0
hidden_size
:
512
initializer_range
:
0.02
input_mask_dtype
:
int32
intermediate_size
:
1024
intra_bottleneck_size
:
128
key_query_shared_bottleneck
:
true
max_sequence_length
:
512
normalization_type
:
no_norm
num_attention_heads
:
4
num_blocks
:
6
num_feedforward_networks
:
2
type_vocab_size
:
2
use_bottleneck_attention
:
false
word_embed_size
:
128
word_vocab_size
:
30522
type
:
mobilebert
mlm_activation
:
relu
mlm_initializer_range
:
0.02
mlm_output_weights_use_proj
:
true
teacher_model
:
cls_heads
:
[]
encoder
:
mobilebert
:
attention_probs_dropout_prob
:
0.1
classifier_activation
:
false
hidden_activation
:
gelu
hidden_dropout_prob
:
0.1
hidden_size
:
512
initializer_range
:
0.02
input_mask_dtype
:
int32
intermediate_size
:
4096
intra_bottleneck_size
:
1024
key_query_shared_bottleneck
:
false
max_sequence_length
:
512
normalization_type
:
layer_norm
num_attention_heads
:
4
num_blocks
:
24
num_feedforward_networks
:
1
type_vocab_size
:
2
use_bottleneck_attention
:
false
word_embed_size
:
128
word_vocab_size
:
30522
type
:
mobilebert
mlm_activation
:
gelu
mlm_initializer_range
:
0.02
teacher_model_init_checkpoint
:
gs://**/uncased_L-24_H-1024_B-512_A-4_teacher/tf2_checkpoint/bert_model.ckpt-1
student_model_init_checkpoint
:
'
'
train_datasest
:
block_length
:
1
cache
:
false
cycle_length
:
null
deterministic
:
null
drop_remainder
:
true
enable_tf_data_service
:
false
global_batch_size
:
2048
input_path
:
gs://**/seq_512_mask_20/wikipedia.tfrecord*,gs://**/seq_512_mask_20/books.tfrecord*
is_training
:
true
max_predictions_per_seq
:
20
seq_length
:
512
sharding
:
true
shuffle_buffer_size
:
100
tf_data_service_address
:
null
tf_data_service_job_name
:
null
tfds_as_supervised
:
false
tfds_data_dir
:
'
'
tfds_name
:
'
'
tfds_skip_decoding_feature
:
'
'
tfds_split
:
'
'
use_next_sentence_label
:
true
use_position_id
:
false
use_v2_feature_names
:
false
eval_dataset
:
block_length
:
1
cache
:
false
cycle_length
:
null
deterministic
:
null
drop_remainder
:
true
enable_tf_data_service
:
false
global_batch_size
:
2048
input_path
:
gs://**/seq_512_mask_20/wikipedia.tfrecord-00141-of-00500,gs://**/seq_512_mask_20/books.tfrecord-00141-of-00500
is_training
:
false
max_predictions_per_seq
:
20
seq_length
:
512
sharding
:
true
shuffle_buffer_size
:
100
tf_data_service_address
:
null
tf_data_service_job_name
:
null
tfds_as_supervised
:
false
tfds_data_dir
:
'
'
tfds_name
:
'
'
tfds_skip_decoding_feature
:
'
'
tfds_split
:
'
'
use_next_sentence_label
:
true
use_position_id
:
false
use_v2_feature_names
:
false
official/projects/edgetpu/nlp/mobilebert_edgetpu_trainer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/mobilebert_edgetpu_trainer_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/modeling/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/modeling/edgetpu_layers.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -123,7 +123,7 @@ class EdgeTPUMultiHeadAttention(tf.keras.layers.MultiHeadAttention):
"""Builds multi-head dot-product attention computations.
This function builds attributes necessary for `_compute_attention` to
c
o
stomize attention computation to replace the default dot-product
c
u
stomize attention computation to replace the default dot-product
attention.
Args:
...
...
official/projects/edgetpu/nlp/modeling/edgetpu_layers_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/modeling/encoder.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -161,7 +161,7 @@ class MobileBERTEncoder(tf.keras.Model):
first_token
=
tf
.
squeeze
(
prev_output
[:,
0
:
1
,
:],
axis
=
1
)
if
classifier_activation
:
self
.
_pooler_layer
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
self
.
_pooler_layer
=
tf
.
keras
.
layers
.
EinsumDense
(
'ab,bc->ac'
,
output_shape
=
hidden_size
,
activation
=
tf
.
tanh
,
...
...
official/projects/edgetpu/nlp/modeling/model_builder.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -85,6 +85,7 @@ def build_bert_pretrainer(pretrainer_cfg: params.PretrainerModelParams,
activation
=
tf_utils
.
get_activation
(
pretrainer_cfg
.
mlm_activation
),
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
pretrainer_cfg
.
mlm_initializer_range
),
output_weights_use_proj
=
pretrainer_cfg
.
mlm_output_weights_use_proj
,
name
=
'cls/predictions'
)
pretrainer
=
edgetpu_pretrainer
.
MobileBERTEdgeTPUPretrainer
(
...
...
official/projects/edgetpu/nlp/modeling/model_builder_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/projects/edgetpu/nlp/modeling/pretrainer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
Prev
1
…
32
33
34
35
36
37
38
39
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment