Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0e096a84
Commit
0e096a84
authored
Jul 12, 2021
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 384358378
parent
f2f4f2dc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
213 deletions
+2
-213
official/nlp/modeling/layers/text_layers.py
official/nlp/modeling/layers/text_layers.py
+2
-117
official/nlp/modeling/layers/text_layers_test.py
official/nlp/modeling/layers/text_layers_test.py
+0
-96
No files found.
official/nlp/modeling/layers/text_layers.py
View file @
0e096a84
...
...
@@ -33,121 +33,6 @@ def _check_if_tf_text_installed():
"'tensorflow-text-nightly'."
)
def
_iterative_vectorized_fair_share
(
capacity
:
tf
.
Tensor
,
limit
:
Union
[
int
,
tf
.
Tensor
]):
"""Iterative algorithm for max min fairness algorithm.
Reference: https://en.wikipedia.org/wiki/Max-min_fairness
The idea is for each example with some number of segments and a limit of
total segment length allowed, we grant each segment a fair share of the
limit. For example, if every segment has the same length, no work to do.
If one segment has below average length, its share will be spilt to others
fairly. In this way, the longest segment will be the shortest among all
potential capacity assignments.
Args:
capacity: A rank-2 Tensor of #Segments x Batch.
limit: The largest permissible number of tokens in total across one example.
Returns:
A rank-2 Tensor with new segment capacity assignment such that
the total number of tokens in each example does not exceed the `limit`.
"""
# Firstly, we calculate the lower bound of the capacity assignment.
per_seg_limit
=
limit
//
capacity
.
shape
[
0
]
limit_mask
=
tf
.
ones
(
capacity
.
shape
,
dtype
=
tf
.
int64
)
*
per_seg_limit
lower_bound
=
tf
.
minimum
(
capacity
,
limit_mask
)
# This step makes up the capacity that already statisfy the capacity limit.
remaining_cap_sum
=
limit
-
tf
.
math
.
reduce_sum
(
lower_bound
,
axis
=
0
)
remaining_cap_mat
=
capacity
-
lower_bound
new_cap
=
lower_bound
+
remaining_cap_mat
*
tf
.
cast
(
tf
.
math
.
reduce_sum
(
remaining_cap_mat
,
axis
=
0
)
<=
remaining_cap_sum
,
tf
.
int64
)
# Process iteratively. This step is O(#segments), see analysis below.
while
True
:
remaining_limit
=
limit
-
tf
.
math
.
reduce_sum
(
new_cap
,
axis
=
0
)
remaining_cap
=
capacity
-
new_cap
masked_remaining_slots
=
tf
.
cast
(
remaining_cap
>
0
,
tf
.
int64
)
remaining_cap_col_slots
=
tf
.
reduce_sum
(
masked_remaining_slots
,
axis
=
0
)
masked_remaining_limit
=
tf
.
cast
(
remaining_cap_col_slots
>
0
,
tf
.
int64
)
*
remaining_limit
# Total remaining segment limit is different for each example.
per_seg_limit
=
masked_remaining_limit
//
(
tf
.
cast
(
remaining_cap_col_slots
<=
0
,
tf
.
int64
)
+
remaining_cap_col_slots
)
# +1 to make sure 0/0 = 0
# Note that for each step, there is at least one more segment being
# fulfilled or the loop is finished.
# The idea is, if remaining per example limit > smallest among segments,
# the smallest segment ask is fullfilled. Otherwise, all remaining segments
# are truncated, the assignment is finished.
if
tf
.
math
.
reduce_sum
(
per_seg_limit
)
>
0
:
remaining_slots_mat
=
tf
.
cast
(
remaining_cap
>
0
,
tf
.
int64
)
new_cap
=
new_cap
+
remaining_slots_mat
*
per_seg_limit
else
:
# Leftover assignment of limit that is smaller than #slots.
new_remained_assignment_mask
=
tf
.
cast
(
(
tf
.
cumsum
(
masked_remaining_slots
,
axis
=
0
)
<=
masked_remaining_limit
)
&
(
masked_remaining_slots
>
0
),
tf
.
int64
)
new_cap
=
new_cap
+
new_remained_assignment_mask
break
return
new_cap
def
round_robin_truncate_inputs
(
inputs
:
Union
[
tf
.
RaggedTensor
,
List
[
tf
.
RaggedTensor
]],
limit
:
Union
[
int
,
tf
.
Tensor
],
)
->
Union
[
tf
.
RaggedTensor
,
List
[
tf
.
RaggedTensor
]]:
"""Truncates a list of batched segments to fit a per-example length limit.
Available space is assigned one token at a time in a round-robin fashion
to the inputs that still need some, until the limit is reached.
(Or equivalently: the longest input is truncated by one token until the total
length of inputs fits the limit.) Examples that fit the limit as passed in
remain unchanged.
Args:
inputs: A list of rank-2 RaggedTensors. The i-th example is given by
the i-th row in each list element, that is, `inputs[:][i, :]`.
limit: The largest permissible number of tokens in total across one example.
Returns:
A list of rank-2 RaggedTensors at corresponding indices with the inputs,
in which the rows of each RaggedTensor have been truncated such that
the total number of tokens in each example does not exceed the `limit`.
"""
if
not
isinstance
(
inputs
,
(
list
,
tuple
)):
return
round_robin_truncate_inputs
([
inputs
],
limit
)[
0
]
limit
=
tf
.
cast
(
limit
,
tf
.
int64
)
if
not
all
(
rt
.
shape
.
rank
==
2
for
rt
in
inputs
):
raise
ValueError
(
"All inputs must have shape [batch_size, (items)]"
)
if
len
(
inputs
)
==
1
:
return
[
_truncate_row_lengths
(
inputs
[
0
],
limit
)]
elif
len
(
inputs
)
==
2
:
size_a
,
size_b
=
[
rt
.
row_lengths
()
for
rt
in
inputs
]
# Here's a brain-twister: This does round-robin assignment of quota
# to both inputs until the limit is reached. Hint: consider separately
# the cases of zero, one, or two inputs exceeding half the limit.
floor_half
=
limit
//
2
ceil_half
=
limit
-
floor_half
quota_a
=
tf
.
minimum
(
size_a
,
ceil_half
+
tf
.
nn
.
relu
(
floor_half
-
size_b
))
quota_b
=
tf
.
minimum
(
size_b
,
floor_half
+
tf
.
nn
.
relu
(
ceil_half
-
size_a
))
return
[
_truncate_row_lengths
(
inputs
[
0
],
quota_a
),
_truncate_row_lengths
(
inputs
[
1
],
quota_b
)]
else
:
# Note that we don't merge with the 2 input case because the full algorithm
# is more expensive.
capacity
=
tf
.
stack
([
rt
.
row_lengths
()
for
rt
in
inputs
])
# #Segments x B
new_capacity
=
_iterative_vectorized_fair_share
(
capacity
,
limit
)
return
[
_truncate_row_lengths
(
inputs
[
i
],
new_capacity
[
i
])
for
i
in
range
(
capacity
.
shape
[
0
])
]
def
_truncate_row_lengths
(
ragged_tensor
:
tf
.
RaggedTensor
,
new_lengths
:
tf
.
Tensor
)
->
tf
.
RaggedTensor
:
"""Truncates the rows of `ragged_tensor` to the given row lengths."""
...
...
@@ -675,8 +560,8 @@ class BertPackInputs(tf.keras.layers.Layer):
# fall back to some ad-hoc truncation.
num_special_tokens
=
len
(
inputs
)
+
1
if
truncator
==
"round_robin"
:
trimmed_segments
=
r
ound
_r
obin
_truncate_inputs
(
inputs
,
seq_length
-
num_special_tokens
)
trimmed_segments
=
text
.
R
ound
R
obin
Trimmer
(
seq_length
-
num_special_tokens
)
.
trim
(
inputs
)
elif
truncator
==
"waterfall"
:
trimmed_segments
=
text
.
WaterfallTrimmer
(
seq_length
-
num_special_tokens
).
trim
(
inputs
)
...
...
official/nlp/modeling/layers/text_layers_test.py
View file @
0e096a84
...
...
@@ -24,102 +24,6 @@ from sentencepiece import SentencePieceTrainer
from
official.nlp.modeling.layers
import
text_layers
class
RoundRobinTruncatorTest
(
tf
.
test
.
TestCase
):
def
_test_input
(
self
,
start
,
lengths
):
return
tf
.
ragged
.
constant
([[
start
+
10
*
j
+
i
for
i
in
range
(
length
)]
for
j
,
length
in
enumerate
(
lengths
)],
dtype
=
tf
.
int32
)
def
test_single_segment
(
self
):
# Single segment.
single_input
=
self
.
_test_input
(
11
,
[
4
,
5
,
6
])
expected_single_output
=
tf
.
ragged
.
constant
(
[[
11
,
12
,
13
,
14
],
[
21
,
22
,
23
,
24
,
25
],
[
31
,
32
,
33
,
34
,
35
],
# Truncated.
])
self
.
assertAllEqual
(
expected_single_output
,
text_layers
.
round_robin_truncate_inputs
(
single_input
,
limit
=
5
))
# Test wrapping in a singleton list.
actual_single_list_output
=
text_layers
.
round_robin_truncate_inputs
(
[
single_input
],
limit
=
5
)
self
.
assertIsInstance
(
actual_single_list_output
,
list
)
self
.
assertAllEqual
(
expected_single_output
,
actual_single_list_output
[
0
])
def
test_two_segments
(
self
):
input_a
=
self
.
_test_input
(
111
,
[
1
,
2
,
2
,
3
,
4
,
5
])
input_b
=
self
.
_test_input
(
211
,
[
1
,
3
,
4
,
2
,
2
,
5
])
expected_a
=
tf
.
ragged
.
constant
(
[[
111
],
[
121
,
122
],
[
131
,
132
],
[
141
,
142
,
143
],
[
151
,
152
,
153
],
# Truncated.
[
161
,
162
,
163
],
# Truncated.
])
expected_b
=
tf
.
ragged
.
constant
(
[[
211
],
[
221
,
222
,
223
],
[
231
,
232
,
233
],
# Truncated.
[
241
,
242
],
[
251
,
252
],
[
261
,
262
],
# Truncated.
])
actual_a
,
actual_b
=
text_layers
.
round_robin_truncate_inputs
(
[
input_a
,
input_b
],
limit
=
5
)
self
.
assertAllEqual
(
expected_a
,
actual_a
)
self
.
assertAllEqual
(
expected_b
,
actual_b
)
def
test_three_segments
(
self
):
input_a
=
self
.
_test_input
(
111
,
[
1
,
2
,
2
,
3
,
4
,
5
,
1
])
input_b
=
self
.
_test_input
(
211
,
[
1
,
3
,
4
,
2
,
2
,
5
,
8
])
input_c
=
self
.
_test_input
(
311
,
[
1
,
3
,
4
,
2
,
2
,
5
,
10
])
seg_limit
=
8
expected_a
=
tf
.
ragged
.
constant
([
[
111
],
[
121
,
122
],
[
131
,
132
],
[
141
,
142
,
143
],
[
151
,
152
,
153
,
154
],
[
161
,
162
,
163
],
# Truncated
[
171
]
])
expected_b
=
tf
.
ragged
.
constant
([
[
211
],
[
221
,
222
,
223
],
[
231
,
232
,
233
],
# Truncated
[
241
,
242
],
[
251
,
252
],
[
261
,
262
,
263
],
# Truncated
[
271
,
272
,
273
,
274
]
# Truncated
])
expected_c
=
tf
.
ragged
.
constant
([
[
311
],
[
321
,
322
,
323
],
[
331
,
332
,
333
],
# Truncated
[
341
,
342
],
[
351
,
352
],
[
361
,
362
],
# Truncated
[
371
,
372
,
373
]
# Truncated
])
actual_a
,
actual_b
,
actual_c
=
text_layers
.
round_robin_truncate_inputs
(
[
input_a
,
input_b
,
input_c
],
limit
=
seg_limit
)
self
.
assertAllEqual
(
expected_a
,
actual_a
)
self
.
assertAllEqual
(
expected_b
,
actual_b
)
self
.
assertAllEqual
(
expected_c
,
actual_c
)
input_cap
=
tf
.
math
.
reduce_sum
(
tf
.
stack
([
rt
.
row_lengths
()
for
rt
in
[
input_a
,
input_b
,
input_c
]]),
axis
=
0
)
per_example_usage
=
tf
.
math
.
reduce_sum
(
tf
.
stack
([
rt
.
row_lengths
()
for
rt
in
[
actual_a
,
actual_b
,
actual_c
]]),
axis
=
0
)
self
.
assertTrue
(
all
(
per_example_usage
<=
tf
.
minimum
(
seg_limit
,
input_cap
)))
# This test covers the in-process behavior of a BertTokenizer layer.
# For saving, restoring, and the restored behavior (incl. shape inference),
# see nlp/tools/export_tfhub_lib_test.py.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment