Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
b0c03e17
Commit
b0c03e17
authored
Apr 21, 2021
by
Dan Kondratyuk
Committed by
A. Unique TensorFlower
Apr 21, 2021
Browse files
Add optional random stride to the video input pipeline.
PiperOrigin-RevId: 369747553
parent
9f3b338c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
46 additions
and
2 deletions
+46
-2
official/vision/beta/configs/video_classification.py
official/vision/beta/configs/video_classification.py
+1
-0
official/vision/beta/dataloaders/video_input.py
official/vision/beta/dataloaders/video_input.py
+20
-0
official/vision/beta/dataloaders/video_input_test.py
official/vision/beta/dataloaders/video_input_test.py
+22
-0
official/vision/beta/ops/preprocess_ops_3d.py
official/vision/beta/ops/preprocess_ops_3d.py
+3
-2
No files found.
official/vision/beta/configs/video_classification.py
View file @
b0c03e17
...
...
@@ -33,6 +33,7 @@ class DataConfig(cfg.DataConfig):
split
:
str
=
'train'
feature_shape
:
Tuple
[
int
,
...]
=
(
64
,
224
,
224
,
3
)
temporal_stride
:
int
=
1
random_stride_range
:
int
=
0
num_test_clips
:
int
=
1
num_test_crops
:
int
=
1
num_classes
:
int
=
-
1
...
...
official/vision/beta/dataloaders/video_input.py
View file @
b0c03e17
...
...
@@ -33,6 +33,7 @@ def process_image(image: tf.Tensor,
is_training
:
bool
=
True
,
num_frames
:
int
=
32
,
stride
:
int
=
1
,
random_stride_range
:
int
=
0
,
num_test_clips
:
int
=
1
,
min_resize
:
int
=
256
,
crop_size
:
int
=
224
,
...
...
@@ -52,6 +53,11 @@ def process_image(image: tf.Tensor,
and left right flip is used.
num_frames: Number of frames per subclip.
stride: Temporal stride to sample frames.
random_stride_range: An int indicating the min and max bounds to uniformly
sample different strides from the video. E.g., a value of 1 with stride=2
will uniformly sample a stride in {1, 2, 3} for each video in a batch.
Only used enabled training for the purposes of frame-rate augmentation.
Defaults to 0, which disables random sampling.
num_test_clips: Number of test clips (1 by default). If more than 1, this
will sample multiple linearly spaced clips within each video at test time.
If 1, then a single clip in the middle of the video is sampled. The clips
...
...
@@ -78,8 +84,20 @@ def process_image(image: tf.Tensor,
'`num_test_clips` %d is ignored since `is_training` is `True`.'
,
num_test_clips
)
if
random_stride_range
<
0
:
raise
ValueError
(
'Random stride range should be >= 0, got {}'
.
format
(
random_stride_range
))
# Temporal sampler.
if
is_training
:
if
random_stride_range
>
0
:
# Uniformly sample different frame-rates
stride
=
tf
.
random
.
uniform
(
[],
tf
.
maximum
(
stride
-
random_stride_range
,
1
),
stride
+
random_stride_range
,
dtype
=
tf
.
int32
)
# Sample random clip.
image
=
preprocess_ops_3d
.
sample_sequence
(
image
,
num_frames
,
True
,
stride
,
seed
)
...
...
@@ -219,6 +237,7 @@ class Parser(parser.Parser):
label_key
:
str
=
LABEL_KEY
):
self
.
_num_frames
=
input_params
.
feature_shape
[
0
]
self
.
_stride
=
input_params
.
temporal_stride
self
.
_random_stride_range
=
input_params
.
random_stride_range
self
.
_num_test_clips
=
input_params
.
num_test_clips
self
.
_min_resize
=
input_params
.
min_image_size
self
.
_crop_size
=
input_params
.
feature_shape
[
1
]
...
...
@@ -248,6 +267,7 @@ class Parser(parser.Parser):
is_training
=
True
,
num_frames
=
self
.
_num_frames
,
stride
=
self
.
_stride
,
random_stride_range
=
self
.
_random_stride_range
,
num_test_clips
=
self
.
_num_test_clips
,
min_resize
=
self
.
_min_resize
,
crop_size
=
self
.
_crop_size
,
...
...
official/vision/beta/dataloaders/video_input_test.py
View file @
b0c03e17
...
...
@@ -135,6 +135,28 @@ class VideoAndLabelParserTest(tf.test.TestCase):
self
.
assertAllEqual
(
label
.
shape
,
(
600
,))
self
.
assertEqual
(
audio
.
shape
,
(
15
,
256
))
def
test_video_input_random_stride
(
self
):
params
=
exp_cfg
.
kinetics600
(
is_training
=
True
)
params
.
feature_shape
=
(
2
,
224
,
224
,
3
)
params
.
min_image_size
=
224
params
.
temporal_stride
=
2
params
.
random_stride_range
=
1
decoder
=
video_input
.
Decoder
()
parser
=
video_input
.
Parser
(
params
).
parse_fn
(
params
.
is_training
)
seq_example
,
label
=
fake_seq_example
()
input_tensor
=
tf
.
constant
(
seq_example
.
SerializeToString
())
decoded_tensors
=
decoder
.
decode
(
input_tensor
)
output_tensor
=
parser
(
decoded_tensors
)
image_features
,
label
=
output_tensor
image
=
image_features
[
'image'
]
self
.
assertAllEqual
(
image
.
shape
,
(
2
,
224
,
224
,
3
))
self
.
assertAllEqual
(
label
.
shape
,
(
600
,))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/preprocess_ops_3d.py
View file @
b0c03e17
...
...
@@ -106,9 +106,10 @@ def sample_sequence(sequence: tf.Tensor,
if
random
:
sequence_length
=
tf
.
cast
(
sequence_length
,
tf
.
float32
)
frame_stride
=
tf
.
cast
(
stride
,
tf
.
float32
)
max_offset
=
tf
.
cond
(
sequence_length
>
(
num_steps
-
1
)
*
stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
stride
,
sequence_length
>
(
num_steps
-
1
)
*
frame_
stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
frame_
stride
,
lambda
:
sequence_length
)
offset
=
tf
.
random
.
uniform
(
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment