Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e00e0e13
Commit
e00e0e13
authored
Dec 03, 2018
by
dreamdragon
Browse files
Merge remote-tracking branch 'upstream/master'
parents
b915db4e
402b561b
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
858 additions
and
4 deletions
+858
-4
research/struct2depth/reader.py
research/struct2depth/reader.py
+344
-0
research/struct2depth/train.py
research/struct2depth/train.py
+259
-0
research/struct2depth/util.py
research/struct2depth/util.py
+252
-0
samples/core/get_started/custom_estimator.py
samples/core/get_started/custom_estimator.py
+2
-3
tutorials/image/cifar10/cifar10_multi_gpu_train.py
tutorials/image/cifar10/cifar10_multi_gpu_train.py
+1
-1
No files found.
research/struct2depth/reader.py
0 → 100644
View file @
e00e0e13
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Reads data that is produced by dataset/gen_data.py."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
random
from
absl
import
logging
import
tensorflow
as
tf
import
util
gfile
=
tf
.
gfile
QUEUE_SIZE
=
2000
QUEUE_BUFFER
=
3
# See nets.encoder_resnet as reference for below input-normalizing constants.
IMAGENET_MEAN
=
(
0.485
,
0.456
,
0.406
)
IMAGENET_SD
=
(
0.229
,
0.224
,
0.225
)
FLIP_RANDOM
=
'random'
# Always perform random flipping.
FLIP_ALWAYS
=
'always'
# Always flip image input, used for test augmentation.
FLIP_NONE
=
'none'
# Always disables flipping.
class
DataReader
(
object
):
"""Reads stored sequences which are produced by dataset/gen_data.py."""
def
__init__
(
self
,
data_dir
,
batch_size
,
img_height
,
img_width
,
seq_length
,
num_scales
,
file_extension
,
random_scale_crop
,
flipping_mode
,
random_color
,
imagenet_norm
,
shuffle
,
input_file
=
'train'
):
self
.
data_dir
=
data_dir
self
.
batch_size
=
batch_size
self
.
img_height
=
img_height
self
.
img_width
=
img_width
self
.
seq_length
=
seq_length
self
.
num_scales
=
num_scales
self
.
file_extension
=
file_extension
self
.
random_scale_crop
=
random_scale_crop
self
.
flipping_mode
=
flipping_mode
self
.
random_color
=
random_color
self
.
imagenet_norm
=
imagenet_norm
self
.
shuffle
=
shuffle
self
.
input_file
=
input_file
def
read_data
(
self
):
"""Provides images and camera intrinsics."""
with
tf
.
name_scope
(
'data_loading'
):
with
tf
.
name_scope
(
'enqueue_paths'
):
seed
=
random
.
randint
(
0
,
2
**
31
-
1
)
self
.
file_lists
=
self
.
compile_file_list
(
self
.
data_dir
,
self
.
input_file
)
image_paths_queue
=
tf
.
train
.
string_input_producer
(
self
.
file_lists
[
'image_file_list'
],
seed
=
seed
,
shuffle
=
self
.
shuffle
,
num_epochs
=
(
1
if
not
self
.
shuffle
else
None
)
)
seg_paths_queue
=
tf
.
train
.
string_input_producer
(
self
.
file_lists
[
'segment_file_list'
],
seed
=
seed
,
shuffle
=
self
.
shuffle
,
num_epochs
=
(
1
if
not
self
.
shuffle
else
None
))
cam_paths_queue
=
tf
.
train
.
string_input_producer
(
self
.
file_lists
[
'cam_file_list'
],
seed
=
seed
,
shuffle
=
self
.
shuffle
,
num_epochs
=
(
1
if
not
self
.
shuffle
else
None
))
img_reader
=
tf
.
WholeFileReader
()
_
,
image_contents
=
img_reader
.
read
(
image_paths_queue
)
seg_reader
=
tf
.
WholeFileReader
()
_
,
seg_contents
=
seg_reader
.
read
(
seg_paths_queue
)
if
self
.
file_extension
==
'jpg'
:
image_seq
=
tf
.
image
.
decode_jpeg
(
image_contents
)
seg_seq
=
tf
.
image
.
decode_jpeg
(
seg_contents
,
channels
=
3
)
elif
self
.
file_extension
==
'png'
:
image_seq
=
tf
.
image
.
decode_png
(
image_contents
,
channels
=
3
)
seg_seq
=
tf
.
image
.
decode_png
(
seg_contents
,
channels
=
3
)
with
tf
.
name_scope
(
'load_intrinsics'
):
cam_reader
=
tf
.
TextLineReader
()
_
,
raw_cam_contents
=
cam_reader
.
read
(
cam_paths_queue
)
rec_def
=
[]
for
_
in
range
(
9
):
rec_def
.
append
([
1.0
])
raw_cam_vec
=
tf
.
decode_csv
(
raw_cam_contents
,
record_defaults
=
rec_def
)
raw_cam_vec
=
tf
.
stack
(
raw_cam_vec
)
intrinsics
=
tf
.
reshape
(
raw_cam_vec
,
[
3
,
3
])
with
tf
.
name_scope
(
'convert_image'
):
image_seq
=
self
.
preprocess_image
(
image_seq
)
# Converts to float.
if
self
.
random_color
:
with
tf
.
name_scope
(
'image_augmentation'
):
image_seq
=
self
.
augment_image_colorspace
(
image_seq
)
image_stack
=
self
.
unpack_images
(
image_seq
)
seg_stack
=
self
.
unpack_images
(
seg_seq
)
if
self
.
flipping_mode
!=
FLIP_NONE
:
random_flipping
=
(
self
.
flipping_mode
==
FLIP_RANDOM
)
with
tf
.
name_scope
(
'image_augmentation_flip'
):
image_stack
,
seg_stack
,
intrinsics
=
self
.
augment_images_flip
(
image_stack
,
seg_stack
,
intrinsics
,
randomized
=
random_flipping
)
if
self
.
random_scale_crop
:
with
tf
.
name_scope
(
'image_augmentation_scale_crop'
):
image_stack
,
seg_stack
,
intrinsics
=
self
.
augment_images_scale_crop
(
image_stack
,
seg_stack
,
intrinsics
,
self
.
img_height
,
self
.
img_width
)
with
tf
.
name_scope
(
'multi_scale_intrinsics'
):
intrinsic_mat
=
self
.
get_multi_scale_intrinsics
(
intrinsics
,
self
.
num_scales
)
intrinsic_mat
.
set_shape
([
self
.
num_scales
,
3
,
3
])
intrinsic_mat_inv
=
tf
.
matrix_inverse
(
intrinsic_mat
)
intrinsic_mat_inv
.
set_shape
([
self
.
num_scales
,
3
,
3
])
if
self
.
imagenet_norm
:
im_mean
=
tf
.
tile
(
tf
.
constant
(
IMAGENET_MEAN
),
multiples
=
[
self
.
seq_length
])
im_sd
=
tf
.
tile
(
tf
.
constant
(
IMAGENET_SD
),
multiples
=
[
self
.
seq_length
])
image_stack_norm
=
(
image_stack
-
im_mean
)
/
im_sd
else
:
image_stack_norm
=
image_stack
with
tf
.
name_scope
(
'batching'
):
if
self
.
shuffle
:
(
image_stack
,
image_stack_norm
,
seg_stack
,
intrinsic_mat
,
intrinsic_mat_inv
)
=
tf
.
train
.
shuffle_batch
(
[
image_stack
,
image_stack_norm
,
seg_stack
,
intrinsic_mat
,
intrinsic_mat_inv
],
batch_size
=
self
.
batch_size
,
capacity
=
QUEUE_SIZE
+
QUEUE_BUFFER
*
self
.
batch_size
,
min_after_dequeue
=
QUEUE_SIZE
)
else
:
(
image_stack
,
image_stack_norm
,
seg_stack
,
intrinsic_mat
,
intrinsic_mat_inv
)
=
tf
.
train
.
batch
(
[
image_stack
,
image_stack_norm
,
seg_stack
,
intrinsic_mat
,
intrinsic_mat_inv
],
batch_size
=
self
.
batch_size
,
num_threads
=
1
,
capacity
=
QUEUE_SIZE
+
QUEUE_BUFFER
*
self
.
batch_size
)
logging
.
info
(
'image_stack: %s'
,
util
.
info
(
image_stack
))
return
(
image_stack
,
image_stack_norm
,
seg_stack
,
intrinsic_mat
,
intrinsic_mat_inv
)
def
unpack_images
(
self
,
image_seq
):
"""[h, w * seq_length, 3] -> [h, w, 3 * seq_length]."""
with
tf
.
name_scope
(
'unpack_images'
):
image_list
=
[
image_seq
[:,
i
*
self
.
img_width
:(
i
+
1
)
*
self
.
img_width
,
:]
for
i
in
range
(
self
.
seq_length
)
]
image_stack
=
tf
.
concat
(
image_list
,
axis
=
2
)
image_stack
.
set_shape
(
[
self
.
img_height
,
self
.
img_width
,
self
.
seq_length
*
3
])
return
image_stack
@
classmethod
def
preprocess_image
(
cls
,
image
):
# Convert from uint8 to float.
return
tf
.
image
.
convert_image_dtype
(
image
,
dtype
=
tf
.
float32
)
@
classmethod
def
augment_image_colorspace
(
cls
,
image_stack
):
"""Apply data augmentation to inputs."""
image_stack_aug
=
image_stack
# Randomly shift brightness.
apply_brightness
=
tf
.
less
(
tf
.
random_uniform
(
shape
=
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
),
0.5
)
image_stack_aug
=
tf
.
cond
(
apply_brightness
,
lambda
:
tf
.
image
.
random_brightness
(
image_stack_aug
,
max_delta
=
0.1
),
lambda
:
image_stack_aug
)
# Randomly shift contrast.
apply_contrast
=
tf
.
less
(
tf
.
random_uniform
(
shape
=
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
),
0.5
)
image_stack_aug
=
tf
.
cond
(
apply_contrast
,
lambda
:
tf
.
image
.
random_contrast
(
image_stack_aug
,
0.85
,
1.15
),
lambda
:
image_stack_aug
)
# Randomly change saturation.
apply_saturation
=
tf
.
less
(
tf
.
random_uniform
(
shape
=
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
),
0.5
)
image_stack_aug
=
tf
.
cond
(
apply_saturation
,
lambda
:
tf
.
image
.
random_saturation
(
image_stack_aug
,
0.85
,
1.15
),
lambda
:
image_stack_aug
)
# Randomly change hue.
apply_hue
=
tf
.
less
(
tf
.
random_uniform
(
shape
=
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
),
0.5
)
image_stack_aug
=
tf
.
cond
(
apply_hue
,
lambda
:
tf
.
image
.
random_hue
(
image_stack_aug
,
max_delta
=
0.1
),
lambda
:
image_stack_aug
)
image_stack_aug
=
tf
.
clip_by_value
(
image_stack_aug
,
0
,
1
)
return
image_stack_aug
@
classmethod
def
augment_images_flip
(
cls
,
image_stack
,
seg_stack
,
intrinsics
,
randomized
=
True
):
"""Randomly flips the image horizontally."""
def
flip
(
cls
,
image_stack
,
seg_stack
,
intrinsics
):
_
,
in_w
,
_
=
image_stack
.
get_shape
().
as_list
()
fx
=
intrinsics
[
0
,
0
]
fy
=
intrinsics
[
1
,
1
]
cx
=
in_w
-
intrinsics
[
0
,
2
]
cy
=
intrinsics
[
1
,
2
]
intrinsics
=
cls
.
make_intrinsics_matrix
(
fx
,
fy
,
cx
,
cy
)
return
(
tf
.
image
.
flip_left_right
(
image_stack
),
tf
.
image
.
flip_left_right
(
seg_stack
),
intrinsics
)
if
randomized
:
prob
=
tf
.
random_uniform
(
shape
=
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
)
predicate
=
tf
.
less
(
prob
,
0.5
)
return
tf
.
cond
(
predicate
,
lambda
:
flip
(
cls
,
image_stack
,
seg_stack
,
intrinsics
),
lambda
:
(
image_stack
,
seg_stack
,
intrinsics
))
else
:
return
flip
(
cls
,
image_stack
,
seg_stack
,
intrinsics
)
@
classmethod
def
augment_images_scale_crop
(
cls
,
im
,
seg
,
intrinsics
,
out_h
,
out_w
):
"""Randomly scales and crops image."""
def
scale_randomly
(
im
,
seg
,
intrinsics
):
"""Scales image and adjust intrinsics accordingly."""
in_h
,
in_w
,
_
=
im
.
get_shape
().
as_list
()
scaling
=
tf
.
random_uniform
([
2
],
1
,
1.15
)
x_scaling
=
scaling
[
0
]
y_scaling
=
scaling
[
1
]
out_h
=
tf
.
cast
(
in_h
*
y_scaling
,
dtype
=
tf
.
int32
)
out_w
=
tf
.
cast
(
in_w
*
x_scaling
,
dtype
=
tf
.
int32
)
# Add batch.
im
=
tf
.
expand_dims
(
im
,
0
)
im
=
tf
.
image
.
resize_area
(
im
,
[
out_h
,
out_w
])
im
=
im
[
0
]
seg
=
tf
.
expand_dims
(
seg
,
0
)
seg
=
tf
.
image
.
resize_area
(
seg
,
[
out_h
,
out_w
])
seg
=
seg
[
0
]
fx
=
intrinsics
[
0
,
0
]
*
x_scaling
fy
=
intrinsics
[
1
,
1
]
*
y_scaling
cx
=
intrinsics
[
0
,
2
]
*
x_scaling
cy
=
intrinsics
[
1
,
2
]
*
y_scaling
intrinsics
=
cls
.
make_intrinsics_matrix
(
fx
,
fy
,
cx
,
cy
)
return
im
,
seg
,
intrinsics
# Random cropping
def
crop_randomly
(
im
,
seg
,
intrinsics
,
out_h
,
out_w
):
"""Crops image and adjust intrinsics accordingly."""
# batch_size, in_h, in_w, _ = im.get_shape().as_list()
in_h
,
in_w
,
_
=
tf
.
unstack
(
tf
.
shape
(
im
))
offset_y
=
tf
.
random_uniform
([
1
],
0
,
in_h
-
out_h
+
1
,
dtype
=
tf
.
int32
)[
0
]
offset_x
=
tf
.
random_uniform
([
1
],
0
,
in_w
-
out_w
+
1
,
dtype
=
tf
.
int32
)[
0
]
im
=
tf
.
image
.
crop_to_bounding_box
(
im
,
offset_y
,
offset_x
,
out_h
,
out_w
)
seg
=
tf
.
image
.
crop_to_bounding_box
(
seg
,
offset_y
,
offset_x
,
out_h
,
out_w
)
fx
=
intrinsics
[
0
,
0
]
fy
=
intrinsics
[
1
,
1
]
cx
=
intrinsics
[
0
,
2
]
-
tf
.
cast
(
offset_x
,
dtype
=
tf
.
float32
)
cy
=
intrinsics
[
1
,
2
]
-
tf
.
cast
(
offset_y
,
dtype
=
tf
.
float32
)
intrinsics
=
cls
.
make_intrinsics_matrix
(
fx
,
fy
,
cx
,
cy
)
return
im
,
seg
,
intrinsics
im
,
seg
,
intrinsics
=
scale_randomly
(
im
,
seg
,
intrinsics
)
im
,
seg
,
intrinsics
=
crop_randomly
(
im
,
seg
,
intrinsics
,
out_h
,
out_w
)
return
im
,
seg
,
intrinsics
def
compile_file_list
(
self
,
data_dir
,
split
,
load_pose
=
False
):
"""Creates a list of input files."""
logging
.
info
(
'data_dir: %s'
,
data_dir
)
with
gfile
.
Open
(
os
.
path
.
join
(
data_dir
,
'%s.txt'
%
split
),
'r'
)
as
f
:
frames
=
f
.
readlines
()
frames
=
[
k
.
rstrip
()
for
k
in
frames
]
subfolders
=
[
x
.
split
(
' '
)[
0
]
for
x
in
frames
]
frame_ids
=
[
x
.
split
(
' '
)[
1
]
for
x
in
frames
]
image_file_list
=
[
os
.
path
.
join
(
data_dir
,
subfolders
[
i
],
frame_ids
[
i
]
+
'.'
+
self
.
file_extension
)
for
i
in
range
(
len
(
frames
))
]
segment_file_list
=
[
os
.
path
.
join
(
data_dir
,
subfolders
[
i
],
frame_ids
[
i
]
+
'-fseg.'
+
self
.
file_extension
)
for
i
in
range
(
len
(
frames
))
]
cam_file_list
=
[
os
.
path
.
join
(
data_dir
,
subfolders
[
i
],
frame_ids
[
i
]
+
'_cam.txt'
)
for
i
in
range
(
len
(
frames
))
]
file_lists
=
{}
file_lists
[
'image_file_list'
]
=
image_file_list
file_lists
[
'segment_file_list'
]
=
segment_file_list
file_lists
[
'cam_file_list'
]
=
cam_file_list
if
load_pose
:
pose_file_list
=
[
os
.
path
.
join
(
data_dir
,
subfolders
[
i
],
frame_ids
[
i
]
+
'_pose.txt'
)
for
i
in
range
(
len
(
frames
))
]
file_lists
[
'pose_file_list'
]
=
pose_file_list
self
.
steps_per_epoch
=
len
(
image_file_list
)
//
self
.
batch_size
return
file_lists
@
classmethod
def
make_intrinsics_matrix
(
cls
,
fx
,
fy
,
cx
,
cy
):
r1
=
tf
.
stack
([
fx
,
0
,
cx
])
r2
=
tf
.
stack
([
0
,
fy
,
cy
])
r3
=
tf
.
constant
([
0.
,
0.
,
1.
])
intrinsics
=
tf
.
stack
([
r1
,
r2
,
r3
])
return
intrinsics
@
classmethod
def
get_multi_scale_intrinsics
(
cls
,
intrinsics
,
num_scales
):
"""Returns multiple intrinsic matrices for different scales."""
intrinsics_multi_scale
=
[]
# Scale the intrinsics accordingly for each scale
for
s
in
range
(
num_scales
):
fx
=
intrinsics
[
0
,
0
]
/
(
2
**
s
)
fy
=
intrinsics
[
1
,
1
]
/
(
2
**
s
)
cx
=
intrinsics
[
0
,
2
]
/
(
2
**
s
)
cy
=
intrinsics
[
1
,
2
]
/
(
2
**
s
)
intrinsics_multi_scale
.
append
(
cls
.
make_intrinsics_matrix
(
fx
,
fy
,
cx
,
cy
))
intrinsics_multi_scale
=
tf
.
stack
(
intrinsics_multi_scale
)
return
intrinsics_multi_scale
research/struct2depth/train.py
0 → 100644
View file @
e00e0e13
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the model. Please refer to README for example usage."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
os
import
random
import
time
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
import
model
import
nets
import
reader
import
util
gfile
=
tf
.
gfile
MAX_TO_KEEP
=
1000000
# Maximum number of checkpoints to keep.
flags
.
DEFINE_string
(
'data_dir'
,
None
,
'Preprocessed data.'
)
flags
.
DEFINE_string
(
'file_extension'
,
'png'
,
'Image data file extension.'
)
flags
.
DEFINE_float
(
'learning_rate'
,
0.0002
,
'Adam learning rate.'
)
flags
.
DEFINE_float
(
'beta1'
,
0.9
,
'Adam momentum.'
)
flags
.
DEFINE_float
(
'reconstr_weight'
,
0.85
,
'Frame reconstruction loss weight.'
)
flags
.
DEFINE_float
(
'ssim_weight'
,
0.15
,
'SSIM loss weight.'
)
flags
.
DEFINE_float
(
'smooth_weight'
,
0.04
,
'Smoothness loss weight.'
)
flags
.
DEFINE_float
(
'icp_weight'
,
0.0
,
'ICP loss weight.'
)
flags
.
DEFINE_float
(
'size_constraint_weight'
,
0.0005
,
'Weight of the object '
'size constraint loss. Use only when motion handling is '
'enabled.'
)
flags
.
DEFINE_integer
(
'batch_size'
,
4
,
'The size of a sample batch'
)
flags
.
DEFINE_integer
(
'img_height'
,
128
,
'Input frame height.'
)
flags
.
DEFINE_integer
(
'img_width'
,
416
,
'Input frame width.'
)
flags
.
DEFINE_integer
(
'seq_length'
,
3
,
'Number of frames in sequence.'
)
flags
.
DEFINE_enum
(
'architecture'
,
nets
.
RESNET
,
nets
.
ARCHITECTURES
,
'Defines the architecture to use for the depth prediction '
'network. Defaults to ResNet-based encoder and accompanying '
'decoder.'
)
flags
.
DEFINE_boolean
(
'imagenet_norm'
,
True
,
'Whether to normalize the input '
'images channel-wise so that they match the distribution '
'most ImageNet-models were trained on.'
)
flags
.
DEFINE_float
(
'weight_reg'
,
0.05
,
'The amount of weight regularization to '
'apply. This has no effect on the ResNet-based encoder '
'architecture.'
)
flags
.
DEFINE_boolean
(
'exhaustive_mode'
,
False
,
'Whether to exhaustively warp '
'from any frame to any other instead of just considering '
'adjacent frames. Where necessary, multiple egomotion '
'estimates will be applied. Does not have an effect if '
'compute_minimum_loss is enabled.'
)
flags
.
DEFINE_boolean
(
'random_scale_crop'
,
False
,
'Whether to apply random '
'image scaling and center cropping during training.'
)
flags
.
DEFINE_enum
(
'flipping_mode'
,
reader
.
FLIP_RANDOM
,
[
reader
.
FLIP_RANDOM
,
reader
.
FLIP_ALWAYS
,
reader
.
FLIP_NONE
],
'Determines the image flipping mode: if random, performs '
'on-the-fly augmentation. Otherwise, flips the input images '
'always or never, respectively.'
)
flags
.
DEFINE_string
(
'pretrained_ckpt'
,
None
,
'Path to checkpoint with '
'pretrained weights. Do not include .data* extension.'
)
flags
.
DEFINE_string
(
'imagenet_ckpt'
,
None
,
'Initialize the weights according '
'to an ImageNet-pretrained checkpoint. Requires '
'architecture to be ResNet-18.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
None
,
'Directory to save model '
'checkpoints.'
)
flags
.
DEFINE_integer
(
'train_steps'
,
10000000
,
'Number of training steps.'
)
flags
.
DEFINE_integer
(
'summary_freq'
,
100
,
'Save summaries every N steps.'
)
flags
.
DEFINE_bool
(
'depth_upsampling'
,
True
,
'Whether to apply depth '
'upsampling of lower-scale representations before warping to '
'compute reconstruction loss on full-resolution image.'
)
flags
.
DEFINE_bool
(
'depth_normalization'
,
True
,
'Whether to apply depth '
'normalization, that is, normalizing inverse depth '
'prediction maps by their mean to avoid degeneration towards '
'small values.'
)
flags
.
DEFINE_bool
(
'compute_minimum_loss'
,
True
,
'Whether to take the '
'element-wise minimum of the reconstruction/SSIM error in '
'order to avoid overly penalizing dis-occlusion effects.'
)
flags
.
DEFINE_bool
(
'use_skip'
,
True
,
'Whether to use skip connections in the '
'encoder-decoder architecture.'
)
flags
.
DEFINE_bool
(
'equal_weighting'
,
False
,
'Whether to use equal weighting '
'of the smoothing loss term, regardless of resolution.'
)
flags
.
DEFINE_bool
(
'joint_encoder'
,
False
,
'Whether to share parameters '
'between the depth and egomotion networks by using a joint '
'encoder architecture. The egomotion network is then '
'operating only on the hidden representation provided by the '
'joint encoder.'
)
flags
.
DEFINE_bool
(
'handle_motion'
,
True
,
'Whether to try to handle motion by '
'using the provided segmentation masks.'
)
flags
.
DEFINE_string
(
'master'
,
'local'
,
'Location of the session.'
)
FLAGS
=
flags
.
FLAGS
flags
.
mark_flag_as_required
(
'data_dir'
)
flags
.
mark_flag_as_required
(
'checkpoint_dir'
)
def
main
(
_
):
# Fixed seed for repeatability
seed
=
8964
tf
.
set_random_seed
(
seed
)
np
.
random
.
seed
(
seed
)
random
.
seed
(
seed
)
if
FLAGS
.
handle_motion
and
FLAGS
.
joint_encoder
:
raise
ValueError
(
'Using a joint encoder is currently not supported when '
'modeling object motion.'
)
if
FLAGS
.
handle_motion
and
FLAGS
.
seq_length
!=
3
:
raise
ValueError
(
'The current motion model implementation only supports '
'using a sequence length of three.'
)
if
FLAGS
.
handle_motion
and
not
FLAGS
.
compute_minimum_loss
:
raise
ValueError
(
'Computing the minimum photometric loss is required when '
'enabling object motion handling.'
)
if
FLAGS
.
size_constraint_weight
>
0
and
not
FLAGS
.
handle_motion
:
raise
ValueError
(
'To enforce object size constraints, enable motion '
'handling.'
)
if
FLAGS
.
imagenet_ckpt
and
not
FLAGS
.
imagenet_norm
:
logging
.
warn
(
'When initializing with an ImageNet-pretrained model, it is '
'recommended to normalize the image inputs accordingly using '
'imagenet_norm.'
)
if
FLAGS
.
compute_minimum_loss
and
FLAGS
.
seq_length
%
2
!=
1
:
raise
ValueError
(
'Compute minimum loss requires using an odd number of '
'images in a sequence.'
)
if
FLAGS
.
architecture
!=
nets
.
RESNET
and
FLAGS
.
imagenet_ckpt
:
raise
ValueError
(
'Can only load weights from pre-trained ImageNet model '
'when using ResNet-architecture.'
)
if
FLAGS
.
compute_minimum_loss
and
FLAGS
.
exhaustive_mode
:
raise
ValueError
(
'Exhaustive mode has no effect when compute_minimum_loss '
'is enabled.'
)
if
FLAGS
.
img_width
%
(
2
**
5
)
!=
0
or
FLAGS
.
img_height
%
(
2
**
5
)
!=
0
:
logging
.
warn
(
'Image size is not divisible by 2^5. For the architecture '
'employed, this could cause artefacts caused by resizing in '
'lower dimensions.'
)
if
FLAGS
.
icp_weight
>
0.0
:
# TODO(casser): Change ICP interface to take matrix instead of vector.
raise
ValueError
(
'ICP is currently not supported.'
)
if
not
gfile
.
Exists
(
FLAGS
.
checkpoint_dir
):
gfile
.
MakeDirs
(
FLAGS
.
checkpoint_dir
)
train_model
=
model
.
Model
(
data_dir
=
FLAGS
.
data_dir
,
file_extension
=
FLAGS
.
file_extension
,
is_training
=
True
,
learning_rate
=
FLAGS
.
learning_rate
,
beta1
=
FLAGS
.
beta1
,
reconstr_weight
=
FLAGS
.
reconstr_weight
,
smooth_weight
=
FLAGS
.
smooth_weight
,
ssim_weight
=
FLAGS
.
ssim_weight
,
icp_weight
=
FLAGS
.
icp_weight
,
batch_size
=
FLAGS
.
batch_size
,
img_height
=
FLAGS
.
img_height
,
img_width
=
FLAGS
.
img_width
,
seq_length
=
FLAGS
.
seq_length
,
architecture
=
FLAGS
.
architecture
,
imagenet_norm
=
FLAGS
.
imagenet_norm
,
weight_reg
=
FLAGS
.
weight_reg
,
exhaustive_mode
=
FLAGS
.
exhaustive_mode
,
random_scale_crop
=
FLAGS
.
random_scale_crop
,
flipping_mode
=
FLAGS
.
flipping_mode
,
depth_upsampling
=
FLAGS
.
depth_upsampling
,
depth_normalization
=
FLAGS
.
depth_normalization
,
compute_minimum_loss
=
FLAGS
.
compute_minimum_loss
,
use_skip
=
FLAGS
.
use_skip
,
joint_encoder
=
FLAGS
.
joint_encoder
,
handle_motion
=
FLAGS
.
handle_motion
,
equal_weighting
=
FLAGS
.
equal_weighting
,
size_constraint_weight
=
FLAGS
.
size_constraint_weight
)
train
(
train_model
,
FLAGS
.
pretrained_ckpt
,
FLAGS
.
imagenet_ckpt
,
FLAGS
.
checkpoint_dir
,
FLAGS
.
train_steps
,
FLAGS
.
summary_freq
)
def
train
(
train_model
,
pretrained_ckpt
,
imagenet_ckpt
,
checkpoint_dir
,
train_steps
,
summary_freq
):
"""Train model."""
vars_to_restore
=
None
if
pretrained_ckpt
is
not
None
:
vars_to_restore
=
util
.
get_vars_to_save_and_restore
(
pretrained_ckpt
)
ckpt_path
=
pretrained_ckpt
elif
imagenet_ckpt
:
vars_to_restore
=
util
.
get_imagenet_vars_to_restore
(
imagenet_ckpt
)
ckpt_path
=
imagenet_ckpt
pretrain_restorer
=
tf
.
train
.
Saver
(
vars_to_restore
)
vars_to_save
=
util
.
get_vars_to_save_and_restore
()
vars_to_save
[
train_model
.
global_step
.
op
.
name
]
=
train_model
.
global_step
saver
=
tf
.
train
.
Saver
(
vars_to_save
,
max_to_keep
=
MAX_TO_KEEP
)
sv
=
tf
.
train
.
Supervisor
(
logdir
=
checkpoint_dir
,
save_summaries_secs
=
0
,
saver
=
None
)
config
=
tf
.
ConfigProto
()
config
.
gpu_options
.
allow_growth
=
True
with
sv
.
managed_session
(
config
=
config
)
as
sess
:
if
pretrained_ckpt
is
not
None
or
imagenet_ckpt
:
logging
.
info
(
'Restoring pretrained weights from %s'
,
ckpt_path
)
pretrain_restorer
.
restore
(
sess
,
ckpt_path
)
logging
.
info
(
'Attempting to resume training from %s...'
,
checkpoint_dir
)
checkpoint
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dir
)
logging
.
info
(
'Last checkpoint found: %s'
,
checkpoint
)
if
checkpoint
:
saver
.
restore
(
sess
,
checkpoint
)
logging
.
info
(
'Training...'
)
start_time
=
time
.
time
()
last_summary_time
=
time
.
time
()
steps_per_epoch
=
train_model
.
reader
.
steps_per_epoch
step
=
1
while
step
<=
train_steps
:
fetches
=
{
'train'
:
train_model
.
train_op
,
'global_step'
:
train_model
.
global_step
,
'incr_global_step'
:
train_model
.
incr_global_step
}
if
step
%
summary_freq
==
0
:
fetches
[
'loss'
]
=
train_model
.
total_loss
fetches
[
'summary'
]
=
sv
.
summary_op
results
=
sess
.
run
(
fetches
)
global_step
=
results
[
'global_step'
]
if
step
%
summary_freq
==
0
:
sv
.
summary_writer
.
add_summary
(
results
[
'summary'
],
global_step
)
train_epoch
=
math
.
ceil
(
global_step
/
steps_per_epoch
)
train_step
=
global_step
-
(
train_epoch
-
1
)
*
steps_per_epoch
this_cycle
=
time
.
time
()
-
last_summary_time
last_summary_time
+=
this_cycle
logging
.
info
(
'Epoch: [%2d] [%5d/%5d] time: %4.2fs (%ds total) loss: %.3f'
,
train_epoch
,
train_step
,
steps_per_epoch
,
this_cycle
,
time
.
time
()
-
start_time
,
results
[
'loss'
])
if
step
%
steps_per_epoch
==
0
:
logging
.
info
(
'[*] Saving checkpoint to %s...'
,
checkpoint_dir
)
saver
.
save
(
sess
,
os
.
path
.
join
(
checkpoint_dir
,
'model'
),
global_step
=
global_step
)
# Setting step to global_step allows for training for a total of
# train_steps even if the program is restarted during training.
step
=
global_step
+
1
if
__name__
==
'__main__'
:
app
.
run
(
main
)
research/struct2depth/util.py
0 → 100644
View file @
e00e0e13
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common utilities and functions."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
locale
import
os
import
re
from
absl
import
logging
import
matplotlib
matplotlib
.
use
(
'Agg'
)
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
tensorflow
as
tf
import
cv2
gfile
=
tf
.
gfile
CMAP_DEFAULT
=
'plasma'
# Defines the cropping that is applied to the Cityscapes dataset with respect to
# the original raw input resolution.
CITYSCAPES_CROP
=
[
256
,
768
,
192
,
1856
]
def
crop_cityscapes
(
im
,
resize
=
None
):
ymin
,
ymax
,
xmin
,
xmax
=
CITYSCAPES_CROP
im
=
im
[
ymin
:
ymax
,
xmin
:
xmax
]
if
resize
is
not
None
:
im
=
cv2
.
resize
(
im
,
resize
)
return
im
def
gray2rgb
(
im
,
cmap
=
CMAP_DEFAULT
):
cmap
=
plt
.
get_cmap
(
cmap
)
result_img
=
cmap
(
im
.
astype
(
np
.
float32
))
if
result_img
.
shape
[
2
]
>
3
:
result_img
=
np
.
delete
(
result_img
,
3
,
2
)
return
result_img
def
load_image
(
img_file
,
resize
=
None
,
interpolation
=
'linear'
):
"""Load image from disk. Output value range: [0,1]."""
im_data
=
np
.
fromstring
(
gfile
.
Open
(
img_file
).
read
(),
np
.
uint8
)
im
=
cv2
.
imdecode
(
im_data
,
cv2
.
IMREAD_COLOR
)
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
if
resize
and
resize
!=
im
.
shape
[:
2
]:
ip
=
cv2
.
INTER_LINEAR
if
interpolation
==
'linear'
else
cv2
.
INTER_NEAREST
im
=
cv2
.
resize
(
im
,
resize
,
interpolation
=
ip
)
return
np
.
array
(
im
,
dtype
=
np
.
float32
)
/
255.0
def
save_image
(
img_file
,
im
,
file_extension
):
"""Save image from disk. Expected input value range: [0,1]."""
im
=
(
im
*
255.0
).
astype
(
np
.
uint8
)
with
gfile
.
Open
(
img_file
,
'w'
)
as
f
:
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_RGB2BGR
)
_
,
im_data
=
cv2
.
imencode
(
'.%s'
%
file_extension
,
im
)
f
.
write
(
im_data
.
tostring
())
def
normalize_depth_for_display
(
depth
,
pc
=
95
,
crop_percent
=
0
,
normalizer
=
None
,
cmap
=
CMAP_DEFAULT
):
"""Converts a depth map to an RGB image."""
# Convert to disparity.
disp
=
1.0
/
(
depth
+
1e-6
)
if
normalizer
is
not
None
:
disp
/=
normalizer
else
:
disp
/=
(
np
.
percentile
(
disp
,
pc
)
+
1e-6
)
disp
=
np
.
clip
(
disp
,
0
,
1
)
disp
=
gray2rgb
(
disp
,
cmap
=
cmap
)
keep_h
=
int
(
disp
.
shape
[
0
]
*
(
1
-
crop_percent
))
disp
=
disp
[:
keep_h
]
return
disp
def
get_seq_start_end
(
target_index
,
seq_length
,
sample_every
=
1
):
"""Returns absolute seq start and end indices for a given target frame."""
half_offset
=
int
((
seq_length
-
1
)
/
2
)
*
sample_every
end_index
=
target_index
+
half_offset
start_index
=
end_index
-
(
seq_length
-
1
)
*
sample_every
return
start_index
,
end_index
def
get_seq_middle
(
seq_length
):
"""Returns relative index for the middle frame in sequence."""
half_offset
=
int
((
seq_length
-
1
)
/
2
)
return
seq_length
-
1
-
half_offset
def
info
(
obj
):
"""Return info on shape and dtype of a numpy array or TensorFlow tensor."""
if
obj
is
None
:
return
'None.'
elif
isinstance
(
obj
,
list
):
if
obj
:
return
'List of %d... %s'
%
(
len
(
obj
),
info
(
obj
[
0
]))
else
:
return
'Empty list.'
elif
isinstance
(
obj
,
tuple
):
if
obj
:
return
'Tuple of %d... %s'
%
(
len
(
obj
),
info
(
obj
[
0
]))
else
:
return
'Empty tuple.'
else
:
if
is_a_numpy_array
(
obj
):
return
'Array with shape: %s, dtype: %s'
%
(
obj
.
shape
,
obj
.
dtype
)
else
:
return
str
(
obj
)
def
is_a_numpy_array
(
obj
):
"""Returns true if obj is a numpy array."""
return
type
(
obj
).
__module__
==
np
.
__name__
def
count_parameters
(
also_print
=
True
):
"""Cound the number of parameters in the model.
Args:
also_print: Boolean. If True also print the numbers.
Returns:
The total number of parameters.
"""
total
=
0
if
also_print
:
logging
.
info
(
'Model Parameters:'
)
for
(
_
,
v
)
in
get_vars_to_save_and_restore
().
items
():
shape
=
v
.
get_shape
()
if
also_print
:
logging
.
info
(
'%s %s: %s'
,
v
.
op
.
name
,
shape
,
format_number
(
shape
.
num_elements
()))
total
+=
shape
.
num_elements
()
if
also_print
:
logging
.
info
(
'Total: %s'
,
format_number
(
total
))
return
total
def
get_vars_to_save_and_restore
(
ckpt
=
None
):
"""Returns list of variables that should be saved/restored.
Args:
ckpt: Path to existing checkpoint. If present, returns only the subset of
variables that exist in given checkpoint.
Returns:
List of all variables that need to be saved/restored.
"""
model_vars
=
tf
.
trainable_variables
()
# Add batchnorm variables.
bn_vars
=
[
v
for
v
in
tf
.
global_variables
()
if
'moving_mean'
in
v
.
op
.
name
or
'moving_variance'
in
v
.
op
.
name
or
'mu'
in
v
.
op
.
name
or
'sigma'
in
v
.
op
.
name
or
'global_scale_var'
in
v
.
op
.
name
]
model_vars
.
extend
(
bn_vars
)
model_vars
=
sorted
(
model_vars
,
key
=
lambda
x
:
x
.
op
.
name
)
mapping
=
{}
if
ckpt
is
not
None
:
ckpt_var
=
tf
.
contrib
.
framework
.
list_variables
(
ckpt
)
ckpt_var_names
=
[
name
for
(
name
,
unused_shape
)
in
ckpt_var
]
ckpt_var_shapes
=
[
shape
for
(
unused_name
,
shape
)
in
ckpt_var
]
not_loaded
=
list
(
ckpt_var_names
)
for
v
in
model_vars
:
if
v
.
op
.
name
not
in
ckpt_var_names
:
# For backward compatibility, try additional matching.
v_additional_name
=
v
.
op
.
name
.
replace
(
'egomotion_prediction/'
,
''
)
if
v_additional_name
in
ckpt_var_names
:
# Check if shapes match.
ind
=
ckpt_var_names
.
index
(
v_additional_name
)
if
ckpt_var_shapes
[
ind
]
==
v
.
get_shape
():
mapping
[
v_additional_name
]
=
v
not_loaded
.
remove
(
v_additional_name
)
continue
else
:
logging
.
warn
(
'Shape mismatch, will not restore %s.'
,
v
.
op
.
name
)
logging
.
warn
(
'Did not find var %s in checkpoint: %s'
,
v
.
op
.
name
,
os
.
path
.
basename
(
ckpt
))
else
:
# Check if shapes match.
ind
=
ckpt_var_names
.
index
(
v
.
op
.
name
)
if
ckpt_var_shapes
[
ind
]
==
v
.
get_shape
():
mapping
[
v
.
op
.
name
]
=
v
not_loaded
.
remove
(
v
.
op
.
name
)
else
:
logging
.
warn
(
'Shape mismatch, will not restore %s.'
,
v
.
op
.
name
)
if
not_loaded
:
logging
.
warn
(
'The following variables in the checkpoint were not loaded:'
)
for
varname_not_loaded
in
not_loaded
:
logging
.
info
(
'%s'
,
varname_not_loaded
)
else
:
# just get model vars.
for
v
in
model_vars
:
mapping
[
v
.
op
.
name
]
=
v
return
mapping
def
get_imagenet_vars_to_restore
(
imagenet_ckpt
):
"""Returns dict of variables to restore from ImageNet-checkpoint."""
vars_to_restore_imagenet
=
{}
ckpt_var_names
=
tf
.
contrib
.
framework
.
list_variables
(
imagenet_ckpt
)
ckpt_var_names
=
[
name
for
(
name
,
unused_shape
)
in
ckpt_var_names
]
model_vars
=
tf
.
global_variables
()
for
v
in
model_vars
:
if
'global_step'
in
v
.
op
.
name
:
continue
mvname_noprefix
=
v
.
op
.
name
.
replace
(
'depth_prediction/'
,
''
)
mvname_noprefix
=
mvname_noprefix
.
replace
(
'moving_mean'
,
'mu'
)
mvname_noprefix
=
mvname_noprefix
.
replace
(
'moving_variance'
,
'sigma'
)
if
mvname_noprefix
in
ckpt_var_names
:
vars_to_restore_imagenet
[
mvname_noprefix
]
=
v
else
:
logging
.
info
(
'The following variable will not be restored from '
'pretrained ImageNet-checkpoint: %s'
,
mvname_noprefix
)
return
vars_to_restore_imagenet
def
format_number
(
n
):
"""Formats number with thousands commas."""
locale
.
setlocale
(
locale
.
LC_ALL
,
'en_US'
)
return
locale
.
format
(
'%d'
,
n
,
grouping
=
True
)
def
atoi
(
text
):
return
int
(
text
)
if
text
.
isdigit
()
else
text
def
natural_keys
(
text
):
return
[
atoi
(
c
)
for
c
in
re
.
split
(
r
'(\d+)'
,
text
)]
def
read_text_lines
(
filepath
):
with
tf
.
gfile
.
Open
(
filepath
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
lines
=
[
l
.
rstrip
()
for
l
in
lines
]
return
lines
samples/core/get_started/custom_estimator.py
View file @
e00e0e13
...
...
@@ -27,9 +27,8 @@ parser.add_argument('--train_steps', default=1000, type=int,
help
=
'number of training steps'
)
def
my_model
(
features
,
labels
,
mode
,
params
):
"""DNN with three hidden layers, and dropout of 0.1 probability."""
# Create three fully connected layers each layer having a dropout
# probability of 0.1.
"""DNN with three hidden layers and learning_rate=0.1."""
# Create three fully connected layers.
net
=
tf
.
feature_column
.
input_layer
(
features
,
params
[
'feature_columns'
])
for
units
in
params
[
'hidden_units'
]:
net
=
tf
.
layers
.
dense
(
net
,
units
=
units
,
activation
=
tf
.
nn
.
relu
)
...
...
tutorials/image/cifar10/cifar10_multi_gpu_train.py
View file @
e00e0e13
...
...
@@ -147,7 +147,7 @@ def train():
# Calculate the learning rate schedule.
num_batches_per_epoch
=
(
cifar10
.
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
/
FLAGS
.
batch_size
)
FLAGS
.
batch_size
/
FLAGS
.
num_gpus
)
decay_steps
=
int
(
num_batches_per_epoch
*
cifar10
.
NUM_EPOCHS_PER_DECAY
)
# Decay the learning rate exponentially based on the number of steps.
...
...
Prev
1
…
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment