Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
5670119e
Commit
5670119e
authored
Jun 27, 2022
by
Rui Qian
Committed by
A. Unique TensorFlower
Jun 27, 2022
Browse files
Internal change
PiperOrigin-RevId: 457627771
parent
ae6f4977
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
48 deletions
+105
-48
official/vision/ops/preprocess_ops_3d.py
official/vision/ops/preprocess_ops_3d.py
+95
-48
official/vision/ops/preprocess_ops_3d_test.py
official/vision/ops/preprocess_ops_3d_test.py
+10
-0
No files found.
official/vision/ops/preprocess_ops_3d.py
View file @
5670119e
...
...
@@ -18,8 +18,7 @@ from typing import Optional, Tuple
import
tensorflow
as
tf
def
_sample_or_pad_sequence_indices
(
sequence
:
tf
.
Tensor
,
num_steps
:
int
,
def
_sample_or_pad_sequence_indices
(
sequence
:
tf
.
Tensor
,
num_steps
:
int
,
stride
:
int
,
offset
:
tf
.
Tensor
)
->
tf
.
Tensor
:
"""Returns indices to take for sampling or padding sequences to fixed size."""
...
...
@@ -28,18 +27,16 @@ def _sample_or_pad_sequence_indices(sequence: tf.Tensor,
# Repeats sequence until num_steps are available in total.
max_length
=
num_steps
*
stride
+
offset
num_repeats
=
tf
.
math
.
floordiv
(
max_length
+
sequence_length
-
1
,
sequence_length
)
num_repeats
=
tf
.
math
.
floordiv
(
max_length
+
sequence_length
-
1
,
sequence_length
)
sel_idx
=
tf
.
tile
(
sel_idx
,
[
num_repeats
])
steps
=
tf
.
range
(
offset
,
offset
+
num_steps
*
stride
,
stride
)
return
tf
.
gather
(
sel_idx
,
steps
)
def
sample_linspace_sequence
(
sequence
:
tf
.
Tensor
,
num_windows
:
int
,
num_steps
:
int
,
stride
:
int
)
->
tf
.
Tensor
:
def
sample_linspace_sequence
(
sequence
:
tf
.
Tensor
,
num_windows
:
int
,
num_steps
:
int
,
stride
:
int
)
->
tf
.
Tensor
:
"""Samples `num_windows` segments from sequence with linearly spaced offsets.
The samples are concatenated in a single `tf.Tensor` in order to have the same
...
...
@@ -66,11 +63,12 @@ def sample_linspace_sequence(sequence: tf.Tensor,
all_indices
=
[]
for
i
in
range
(
num_windows
):
all_indices
.
append
(
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offsets
[
i
]))
all_indices
.
append
(
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offsets
[
i
]))
indices
=
tf
.
concat
(
all_indices
,
axis
=
0
)
indices
.
set_shape
((
num_windows
*
num_steps
,))
...
...
@@ -110,25 +108,76 @@ def sample_sequence(sequence: tf.Tensor,
sequence_length
>
(
num_steps
-
1
)
*
frame_stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
frame_stride
,
lambda
:
sequence_length
)
offset
=
tf
.
random
.
uniform
(
(),
maxval
=
tf
.
cast
(
max_offset
,
dtype
=
tf
.
int32
),
dtype
=
tf
.
int32
,
seed
=
seed
)
offset
=
tf
.
random
.
uniform
((),
maxval
=
tf
.
cast
(
max_offset
,
dtype
=
tf
.
int32
),
dtype
=
tf
.
int32
,
seed
=
seed
)
else
:
offset
=
(
sequence_length
-
num_steps
*
stride
)
//
2
offset
=
tf
.
maximum
(
0
,
offset
)
indices
=
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offset
)
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offset
)
indices
.
set_shape
((
num_steps
,))
return
tf
.
gather
(
sequence
,
indices
)
def
sample_segment_sequence
(
sequence
:
tf
.
Tensor
,
num_frames
:
int
,
is_training
:
bool
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Samples a single segment of size `num_frames` from a given sequence.
This function follows the temporal segment network sampling style
(https://arxiv.org/abs/1608.00859). The video sequence would be divided into
`num_frames` non-overlapping segments with same length. If `is_training` is
`True`, we would randomly sampling one frame for each segment, and when
`is_training` is `False`, only the center frame of each segment is sampled.
Args:
sequence: Any tensor where the first dimension is timesteps.
num_frames: Number of frames to take.
is_training: A boolean indicating sampling in training or evaluation mode.
seed: A deterministic seed to use when sampling.
Returns:
A single `tf.Tensor` with first dimension `num_steps` with the sampled
segment.
"""
sequence_length
=
tf
.
shape
(
sequence
)[
0
]
sequence_length
=
tf
.
cast
(
sequence_length
,
tf
.
float32
)
segment_length
=
tf
.
cast
(
sequence_length
//
num_frames
,
tf
.
float32
)
segment_indices
=
tf
.
linspace
(
0.0
,
sequence_length
,
num_frames
+
1
)
segment_indices
=
tf
.
cast
(
segment_indices
,
tf
.
int32
)
if
is_training
:
segment_length
=
tf
.
cast
(
segment_length
,
tf
.
int32
)
# pylint:disable=g-long-lambda
segment_offsets
=
tf
.
cond
(
segment_length
==
0
,
lambda
:
tf
.
zeros
(
shape
=
(
num_frames
,),
dtype
=
tf
.
int32
),
lambda
:
tf
.
random
.
uniform
(
shape
=
(
num_frames
,),
minval
=
0
,
maxval
=
segment_length
,
dtype
=
tf
.
int32
,
seed
=
seed
))
# pylint:disable=g-long-lambda
else
:
# Only sampling central frame during inference for being deterministic.
segment_offsets
=
tf
.
ones
(
shape
=
(
num_frames
,),
dtype
=
tf
.
int32
)
*
tf
.
cast
(
segment_length
//
2
,
dtype
=
tf
.
int32
)
indices
=
segment_indices
[:
-
1
]
+
segment_offsets
indices
.
set_shape
((
num_frames
,))
return
tf
.
gather
(
sequence
,
indices
)
def
decode_jpeg
(
image_string
:
tf
.
Tensor
,
channels
:
int
=
0
)
->
tf
.
Tensor
:
"""Decodes JPEG raw bytes string into a RGB uint8 Tensor.
...
...
@@ -144,7 +193,9 @@ def decode_jpeg(image_string: tf.Tensor, channels: int = 0) -> tf.Tensor:
"""
return
tf
.
map_fn
(
lambda
x
:
tf
.
image
.
decode_jpeg
(
x
,
channels
=
channels
),
image_string
,
back_prop
=
False
,
dtype
=
tf
.
uint8
)
image_string
,
back_prop
=
False
,
dtype
=
tf
.
uint8
)
def
crop_image
(
frames
:
tf
.
Tensor
,
...
...
@@ -229,8 +280,7 @@ def crop_image(frames: tf.Tensor,
return
frames
def
resize_smallest
(
frames
:
tf
.
Tensor
,
min_resize
:
int
)
->
tf
.
Tensor
:
def
resize_smallest
(
frames
:
tf
.
Tensor
,
min_resize
:
int
)
->
tf
.
Tensor
:
"""Resizes frames so that min(`height`, `width`) is equal to `min_resize`.
This function will not do anything if the min(`height`, `width`) is already
...
...
@@ -255,18 +305,15 @@ def resize_smallest(frames: tf.Tensor,
frames_resized
=
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
))
return
tf
.
cast
(
frames_resized
,
frames
.
dtype
)
should_resize
=
tf
.
math
.
logical_or
(
tf
.
not_equal
(
input_w
,
output_w
),
tf
.
not_equal
(
input_h
,
output_h
))
should_resize
=
tf
.
math
.
logical_or
(
tf
.
not_equal
(
input_w
,
output_w
),
tf
.
not_equal
(
input_h
,
output_h
))
frames
=
tf
.
cond
(
should_resize
,
resize_fn
,
lambda
:
frames
)
return
frames
def
random_crop_resize
(
frames
:
tf
.
Tensor
,
output_h
:
int
,
output_w
:
int
,
num_frames
:
int
,
num_channels
:
int
,
def
random_crop_resize
(
frames
:
tf
.
Tensor
,
output_h
:
int
,
output_w
:
int
,
num_frames
:
int
,
num_channels
:
int
,
aspect_ratio
:
Tuple
[
float
,
float
],
area_range
:
Tuple
[
float
,
float
])
->
tf
.
Tensor
:
"""First crops clip with jittering and then resizes to (output_h, output_w).
...
...
@@ -279,6 +326,7 @@ def random_crop_resize(frames: tf.Tensor,
num_channels: Number of channels of the clip.
aspect_ratio: Float tuple with the aspect range for cropping.
area_range: Float tuple with the area range for cropping.
Returns:
A Tensor of shape [timesteps, output_h, output_w, channels] of type
frames.dtype.
...
...
@@ -299,21 +347,16 @@ def random_crop_resize(frames: tf.Tensor,
bbox_begin
,
bbox_size
,
_
=
sample_distorted_bbox
offset_y
,
offset_x
,
_
=
tf
.
unstack
(
bbox_begin
)
target_height
,
target_width
,
_
=
tf
.
unstack
(
bbox_size
)
size
=
tf
.
convert_to_tensor
((
seq_len
,
target_height
,
target_width
,
channels
))
offset
=
tf
.
convert_to_tensor
((
0
,
offset_y
,
offset_x
,
0
))
size
=
tf
.
convert_to_tensor
((
seq_len
,
target_height
,
target_width
,
channels
))
offset
=
tf
.
convert_to_tensor
((
0
,
offset_y
,
offset_x
,
0
))
frames
=
tf
.
slice
(
frames
,
offset
,
size
)
frames
=
tf
.
cast
(
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
)),
frames
.
dtype
)
frames
=
tf
.
cast
(
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
)),
frames
.
dtype
)
frames
.
set_shape
((
num_frames
,
output_h
,
output_w
,
num_channels
))
return
frames
def
random_flip_left_right
(
frames
:
tf
.
Tensor
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
def
random_flip_left_right
(
frames
:
tf
.
Tensor
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Flips all the frames with a probability of 50%.
Args:
...
...
@@ -324,12 +367,16 @@ def random_flip_left_right(
A Tensor of shape [timesteps, output_h, output_w, channels] eventually
flipped left right.
"""
is_flipped
=
tf
.
random
.
uniform
(
(),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
,
seed
=
seed
)
frames
=
tf
.
cond
(
tf
.
equal
(
is_flipped
,
1
),
true_fn
=
lambda
:
tf
.
image
.
flip_left_right
(
frames
),
false_fn
=
lambda
:
frames
)
is_flipped
=
tf
.
random
.
uniform
((),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
,
seed
=
seed
)
frames
=
tf
.
cond
(
tf
.
equal
(
is_flipped
,
1
),
true_fn
=
lambda
:
tf
.
image
.
flip_left_right
(
frames
),
false_fn
=
lambda
:
frames
)
return
frames
...
...
official/vision/ops/preprocess_ops_3d_test.py
View file @
5670119e
...
...
@@ -72,6 +72,16 @@ class ParserUtilsTest(tf.test.TestCase):
self
.
assertBetween
(
offset_3
,
0
,
99
)
self
.
assertAllEqual
(
sampled_seq_3
,
range
(
offset_3
,
offset_3
+
10
))
def
test_sample_segment_sequence
(
self
):
sequence
=
tf
.
range
(
100
)
sampled_seq_1
=
preprocess_ops_3d
.
sample_segment_sequence
(
sequence
,
10
,
False
)
sampled_seq_2
=
preprocess_ops_3d
.
sample_segment_sequence
(
sequence
,
10
,
True
)
self
.
assertAllEqual
(
sampled_seq_1
,
[
5
+
i
*
10
for
i
in
range
(
10
)])
for
idx
,
v
in
enumerate
(
sampled_seq_2
):
self
.
assertBetween
(
v
-
idx
*
10
,
0
,
10
)
def
test_decode_jpeg
(
self
):
# Create a random RGB JPEG image.
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
(
263
,
320
,
3
),
dtype
=
np
.
uint8
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment