Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e02da657
Commit
e02da657
authored
Jan 20, 2021
by
Yin Cui
Committed by
A. Unique TensorFlower
Jan 20, 2021
Browse files
Internal change
PiperOrigin-RevId: 352854870
parent
fa15ed1e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
19 deletions
+30
-19
official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
...xperiments/video_classification/k400_slowonly8x8_tpu.yaml
+1
-1
official/vision/beta/dataloaders/video_input.py
official/vision/beta/dataloaders/video_input.py
+5
-6
official/vision/beta/projects/video_ssl/configs/video_ssl.py
official/vision/beta/projects/video_ssl/configs/video_ssl.py
+2
-0
official/vision/beta/projects/video_ssl/dataloaders/video_ssl_input.py
...on/beta/projects/video_ssl/dataloaders/video_ssl_input.py
+22
-12
No files found.
official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
View file @
e02da657
# SlowOnly video classification on Kinetics-400. Expected performance to be updated.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 7
1.5
% top-1,
89.5
% top-5.
# Expected accuracy: 7
4.1
% top-1,
91.4
% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
...
...
official/vision/beta/dataloaders/video_input.py
View file @
e02da657
...
...
@@ -86,15 +86,14 @@ def _process_image(image: tf.Tensor,
# Decode JPEG string to tf.uint8.
image
=
preprocess_ops_3d
.
decode_jpeg
(
image
,
3
)
# Resize images (resize happens only if necessary to save compute).
image
=
preprocess_ops_3d
.
resize_smallest
(
image
,
min_resize
)
if
is_training
:
# Standard image data augmentation: random crop and random flip.
image
=
preprocess_ops_3d
.
crop_image
(
image
,
crop_size
,
crop_size
,
True
,
seed
)
# Standard image data augmentation: random
resized
crop and random flip.
image
=
preprocess_ops_3d
.
random_
crop_
re
size
(
image
,
crop_size
,
crop_size
,
num_frames
,
3
,
(
0.5
,
2
),
(
0.08
,
1
)
)
image
=
preprocess_ops_3d
.
random_flip_left_right
(
image
,
seed
)
else
:
# Resize images (resize happens only if necessary to save compute).
image
=
preprocess_ops_3d
.
resize_smallest
(
image
,
min_resize
)
# Crop of the frames.
image
=
preprocess_ops_3d
.
crop_image
(
image
,
crop_size
,
crop_size
,
False
,
num_crops
)
...
...
official/vision/beta/projects/video_ssl/configs/video_ssl.py
View file @
e02da657
...
...
@@ -57,6 +57,7 @@ def video_ssl_linear_eval_kinetics400() -> cfg.ExperimentConfig:
**
exp
.
task
.
validation_data
.
as_dict
())
exp
.
task
.
validation_data
.
min_image_size
=
256
exp
.
task
.
validation_data
.
num_test_clips
=
10
exp
.
task
.
validation_data
.
num_test_crops
=
3
return
exp
...
...
@@ -84,4 +85,5 @@ def video_ssl_linear_eval_kinetics600() -> cfg.ExperimentConfig:
exp
.
task
.
validation_data
.
temporal_stride
=
2
exp
.
task
.
validation_data
.
min_image_size
=
256
exp
.
task
.
validation_data
.
num_test_clips
=
10
exp
.
task
.
validation_data
.
num_test_crops
=
3
return
exp
official/vision/beta/projects/video_ssl/dataloaders/video_ssl_input.py
View file @
e02da657
...
...
@@ -36,8 +36,9 @@ def _process_image(image: tf.Tensor,
num_frames
:
int
=
32
,
stride
:
int
=
1
,
num_test_clips
:
int
=
1
,
min_resize
:
int
=
224
,
crop_size
:
int
=
200
,
min_resize
:
int
=
256
,
crop_size
:
int
=
224
,
num_crops
:
int
=
1
,
zero_centering_image
:
bool
=
False
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Processes a serialized image tensor.
...
...
@@ -57,6 +58,7 @@ def _process_image(image: tf.Tensor,
min_resize: Frames are resized so that min(height, width) is min_resize.
crop_size: Final size of the frame after cropping the resized frames. Both
height and width are the same.
num_crops: Number of crops to perform on the resized frames.
zero_centering_image: If True, frames are normalized to values in [-1, 1].
If False, values in [0, 1].
seed: A deterministic seed to use when sampling.
...
...
@@ -115,8 +117,8 @@ def _process_image(image: tf.Tensor,
# Resize images (resize happens only if necessary to save compute).
image
=
preprocess_ops_3d
.
resize_smallest
(
image
,
min_resize
)
# Three-crop of the frames.
image
=
preprocess_ops_3d
.
crop_image
(
image
,
min_resize
,
min_re
size
,
False
,
3
)
image
=
preprocess_ops_3d
.
crop_image
(
image
,
crop_size
,
crop_
size
,
False
,
num_crops
)
# Cast the frames in float32, normalizing according to zero_centering_image.
if
is_training
and
is_ssl
:
...
...
@@ -145,7 +147,8 @@ def _postprocess_image(image: tf.Tensor,
is_training
:
bool
=
True
,
is_ssl
:
bool
=
False
,
num_frames
:
int
=
32
,
num_test_clips
:
int
=
1
)
->
tf
.
Tensor
:
num_test_clips
:
int
=
1
,
num_test_crops
:
int
=
1
)
->
tf
.
Tensor
:
"""Processes a batched Tensor of frames.
The same parameters used in process should be used here.
...
...
@@ -160,20 +163,24 @@ def _postprocess_image(image: tf.Tensor,
will sample multiple linearly spaced clips within each video at test time.
If 1, then a single clip in the middle of the video is sampled. The clips
are aggreagated in the batch dimension.
num_test_crops: Number of test crops (1 by default). If more than 1, there
are multiple crops for each clip at test time. If 1, there is a single
central crop. The crops are aggreagated in the batch dimension.
Returns:
Processed frames. Tensor of shape
[batch * num_test_clips, num_frames, height, width, 3].
[batch * num_test_clips
* num_test_crops
, num_frames, height, width, 3].
"""
if
is_ssl
and
is_training
:
# In this case, two clips of self-supervised pre-training are merged
# together in batch dimenstion which will be 2 * batch.
image
=
tf
.
concat
(
tf
.
split
(
image
,
num_or_size_splits
=
2
,
axis
=
1
),
axis
=
0
)
if
num_test_clips
>
1
and
not
is_training
:
# In this case, multiple clips are merged together in batch dimenstion which
# will be B * num_test_clips.
image
=
tf
.
reshape
(
image
,
(
-
1
,
num_frames
)
+
image
.
shape
[
2
:])
num_views
=
num_test_clips
*
num_test_crops
if
num_views
>
1
and
not
is_training
:
# In this case, multiple views are merged together in batch dimenstion which
# will be batch * num_views.
image
=
tf
.
reshape
(
image
,
[
-
1
,
num_frames
]
+
image
.
shape
[
2
:].
as_list
())
return
image
...
...
@@ -247,7 +254,8 @@ class Parser(video_input.Parser):
stride
=
self
.
_stride
,
num_test_clips
=
self
.
_num_test_clips
,
min_resize
=
self
.
_min_resize
,
crop_size
=
self
.
_crop_size
)
crop_size
=
self
.
_crop_size
,
num_crops
=
self
.
_num_crops
)
image
=
tf
.
cast
(
image
,
dtype
=
self
.
_dtype
)
features
=
{
'image'
:
image
}
...
...
@@ -293,6 +301,7 @@ class PostBatchProcessor(object):
self
.
_is_ssl
=
input_params
.
is_ssl
self
.
_num_frames
=
input_params
.
feature_shape
[
0
]
self
.
_num_test_clips
=
input_params
.
num_test_clips
self
.
_num_test_crops
=
input_params
.
num_test_crops
def
__call__
(
self
,
features
:
Dict
[
str
,
tf
.
Tensor
],
label
:
tf
.
Tensor
)
->
Tuple
[
Dict
[
str
,
tf
.
Tensor
],
tf
.
Tensor
]:
...
...
@@ -304,6 +313,7 @@ class PostBatchProcessor(object):
is_training
=
self
.
_is_training
,
is_ssl
=
self
.
_is_ssl
,
num_frames
=
self
.
_num_frames
,
num_test_clips
=
self
.
_num_test_clips
)
num_test_clips
=
self
.
_num_test_clips
,
num_test_crops
=
self
.
_num_test_crops
)
return
features
,
label
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment