Commit 44fa1d37 authored by Alex Lee's avatar Alex Lee
Browse files

Merge remote-tracking branch 'upstream/master'

parents d3628a74 6e367f67
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
mkdir -p data/stanford_building_parser_dataset
mkdir -p data/stanford_building_parser_dataset/mesh
cd data/stanford_building_parser_dataset_raw
# Untar the files and extract the meshes.
for t in "1" "3" "4" "5a" "5b" "6"; do
tar -xf area_"$t"_noXYZ.tar area_$t/3d/rgb_textures
mv area_$t/3d/rgb_textures ../stanford_building_parser_dataset/mesh/area$t
rmdir area_$t/3d
rmdir area_$t
done
cd ../../
# Preprocess meshes to remove the group and chunk information.
cd data/stanford_building_parser_dataset/
for t in "1" "3" "4" "5a" "5b" "6"; do
obj_name=`ls mesh/area$t/*.obj`
cp $obj_name "$obj_name".bck
cat $obj_name.bck | grep -v '^g' | grep -v '^o' > $obj_name
done
cd ../../
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Test CMP models.
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_r2r+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_r2r
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_ST+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_ST
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_ST+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_ST
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80
# Test LSTM baseline models.
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r+bench_test \
--logdir output/bl.v2.noclip.sbpd_d_r2r
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_r2r+bench_test \
--logdir output/bl.v2.noclip.sbpd_rgb_r2r
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_ST+bench_test \
--logdir output/bl.v2.noclip.sbpd_d_ST
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_ST+bench_test \
--logdir output/bl.v2.noclip.sbpd_rgb_ST
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r_h0_64_80+bench_test \
--logdir output/bl.v2.noclip.sbpd_d_r2r_h0_64_80
# Visualize test trajectories in top view.
# CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \
# python scripts/script_plot_trajectory.py \
# --first_person --num_steps 40 \
# --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \
# --imset test --alsologtostderr
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for processing depth images.
"""
import numpy as np
import src.rotation_utils as ru
def get_camera_matrix(width, height, fov):
"""Returns a camera matrix from image size and fov."""
xc = (width-1.) / 2.
zc = (height-1.) / 2.
f = (width / 2.) / np.tan(np.deg2rad(fov / 2.))
camera_matrix = utils.Foo(xc=xc, zc=zc, f=f)
return camera_matrix
def get_point_cloud_from_z(Y, camera_matrix):
"""Projects the depth image Y into a 3D point cloud.
Inputs:
Y is ...xHxW
camera_matrix
Outputs:
X is positive going right
Y is positive into the image
Z is positive up in the image
XYZ is ...xHxWx3
"""
x, z = np.meshgrid(np.arange(Y.shape[-1]),
np.arange(Y.shape[-2]-1, -1, -1))
for i in range(Y.ndim-2):
x = np.expand_dims(x, axis=0)
z = np.expand_dims(z, axis=0)
X = (x-camera_matrix.xc) * Y / camera_matrix.f
Z = (z-camera_matrix.zc) * Y / camera_matrix.f
XYZ = np.concatenate((X[...,np.newaxis], Y[...,np.newaxis],
Z[...,np.newaxis]), axis=X.ndim)
return XYZ
def make_geocentric(XYZ, sensor_height, camera_elevation_degree):
"""Transforms the point cloud into geocentric coordinate frame.
Input:
XYZ : ...x3
sensor_height : height of the sensor
camera_elevation_degree : camera elevation to rectify.
Output:
XYZ : ...x3
"""
R = ru.get_r_matrix([1.,0.,0.], angle=np.deg2rad(camera_elevation_degree))
XYZ = np.matmul(XYZ.reshape(-1,3), R.T).reshape(XYZ.shape)
XYZ[...,2] = XYZ[...,2] + sensor_height
return XYZ
def bin_points(XYZ_cms, map_size, z_bins, xy_resolution):
"""Bins points into xy-z bins
XYZ_cms is ... x H x W x3
Outputs is ... x map_size x map_size x (len(z_bins)+1)
"""
sh = XYZ_cms.shape
XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]])
n_z_bins = len(z_bins)+1
map_center = (map_size-1.)/2.
counts = []
isvalids = []
for XYZ_cm in XYZ_cms:
isnotnan = np.logical_not(np.isnan(XYZ_cm[:,:,0]))
X_bin = np.round(XYZ_cm[:,:,0] / xy_resolution + map_center).astype(np.int32)
Y_bin = np.round(XYZ_cm[:,:,1] / xy_resolution + map_center).astype(np.int32)
Z_bin = np.digitize(XYZ_cm[:,:,2], bins=z_bins).astype(np.int32)
isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0, Y_bin < map_size,
Z_bin >= 0, Z_bin < n_z_bins, isnotnan])
isvalid = np.all(isvalid, axis=0)
ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin
ind[np.logical_not(isvalid)] = 0
count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32),
minlength=map_size*map_size*n_z_bins)
count = np.reshape(count, [map_size, map_size, n_z_bins])
counts.append(count)
isvalids.append(isvalid)
counts = np.array(counts).reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins])
isvalids = np.array(isvalids).reshape(list(sh[:-3]) + [sh[-3], sh[-2], 1])
return counts, isvalids
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for manipulating files.
"""
import os
import PIL
from tensorflow.python.platform import gfile
import cv2
exists = lambda path: gfile.Exists(path)
fopen = lambda path, mode: gfile.Open(path, mode)
makedirs = lambda path: gfile.MakeDirs(path)
listdir = lambda path: gfile.ListDir(path)
copyfile = lambda a, b, o: gfile.Copy(a,b,o)
def write_image(image_path, rgb):
ext = os.path.splitext(image_path)[1]
with gfile.GFile(image_path, 'w') as f:
img_str = cv2.imencode(ext, rgb[:,:,::-1])[1].tostring()
f.write(img_str)
def read_image(image_path, type='rgb'):
with fopen(file_name, 'r') as f:
I = PIL.Image.open(f)
II = np.array(I)
if type == 'rgb':
II = II[:,:,:3]
return II
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various function to manipulate graphs for computing distances.
"""
import skimage.morphology
import numpy as np
import networkx as nx
import itertools
import graph_tool as gt
import graph_tool.topology
import graph_tool.generation
import src.utils as utils
# Compute shortest path from all nodes to or from all source nodes
def get_distance_node_list(gtG, source_nodes, direction, weights=None):
gtG_ = gt.Graph(gtG)
v = gtG_.add_vertex()
if weights is not None:
weights = gtG_.edge_properties[weights]
for s in source_nodes:
e = gtG_.add_edge(s, int(v))
if weights is not None:
weights[e] = 0.
if direction == 'to':
dist = gt.topology.shortest_distance(
gt.GraphView(gtG_, reversed=True), source=gtG_.vertex(int(v)),
target=None, weights=weights)
elif direction == 'from':
dist = gt.topology.shortest_distance(
gt.GraphView(gtG_, reversed=False), source=gtG_.vertex(int(v)),
target=None, weights=weights)
dist = np.array(dist.get_array())
dist = dist[:-1]
if weights is None:
dist = dist-1
return dist
# Functions for semantically labelling nodes in the traversal graph.
def generate_lattice(sz_x, sz_y):
"""Generates a lattice with sz_x vertices along x and sz_y vertices along y
direction Each of these vertices is step_size distance apart. Origin is at
(0,0). """
g = gt.generation.lattice([sz_x, sz_y])
x, y = np.meshgrid(np.arange(sz_x), np.arange(sz_y))
x = np.reshape(x, [-1,1]); y = np.reshape(y, [-1,1]);
nodes = np.concatenate((x,y), axis=1)
return g, nodes
def add_diagonal_edges(g, nodes, sz_x, sz_y, edge_len):
offset = [sz_x+1, sz_x-1]
for o in offset:
s = np.arange(nodes.shape[0]-o-1)
t = s + o
ind = np.all(np.abs(nodes[s,:] - nodes[t,:]) == np.array([[1,1]]), axis=1)
s = s[ind][:,np.newaxis]
t = t[ind][:,np.newaxis]
st = np.concatenate((s,t), axis=1)
for i in range(st.shape[0]):
e = g.add_edge(st[i,0], st[i,1], add_missing=False)
g.ep['wts'][e] = edge_len
def convert_traversible_to_graph(traversible, ff_cost=1., fo_cost=1.,
oo_cost=1., connectivity=4):
assert(connectivity == 4 or connectivity == 8)
sz_x = traversible.shape[1]
sz_y = traversible.shape[0]
g, nodes = generate_lattice(sz_x, sz_y)
# Assign costs.
edge_wts = g.new_edge_property('float')
g.edge_properties['wts'] = edge_wts
wts = np.ones(g.num_edges(), dtype=np.float32)
edge_wts.get_array()[:] = wts
if connectivity == 8:
add_diagonal_edges(g, nodes, sz_x, sz_y, np.sqrt(2.))
se = np.array([[int(e.source()), int(e.target())] for e in g.edges()])
s_xy = nodes[se[:,0]]
t_xy = nodes[se[:,1]]
s_t = np.ravel_multi_index((s_xy[:,1], s_xy[:,0]), traversible.shape)
t_t = np.ravel_multi_index((t_xy[:,1], t_xy[:,0]), traversible.shape)
s_t = traversible.ravel()[s_t]
t_t = traversible.ravel()[t_t]
wts = np.zeros(g.num_edges(), dtype=np.float32)
wts[np.logical_and(s_t == True, t_t == True)] = ff_cost
wts[np.logical_and(s_t == False, t_t == False)] = oo_cost
wts[np.logical_xor(s_t, t_t)] = fo_cost
edge_wts = g.edge_properties['wts']
for i, e in enumerate(g.edges()):
edge_wts[e] = edge_wts[e] * wts[i]
# d = edge_wts.get_array()*1.
# edge_wts.get_array()[:] = d*wts
return g, nodes
def label_nodes_with_class(nodes_xyt, class_maps, pix):
"""
Returns:
class_maps__: one-hot class_map for each class.
node_class_label: one-hot class_map for each class, nodes_xyt.shape[0] x n_classes
"""
# Assign each pixel to a node.
selem = skimage.morphology.disk(pix)
class_maps_ = class_maps*1.
for i in range(class_maps.shape[2]):
class_maps_[:,:,i] = skimage.morphology.dilation(class_maps[:,:,i]*1, selem)
class_maps__ = np.argmax(class_maps_, axis=2)
class_maps__[np.max(class_maps_, axis=2) == 0] = -1
# For each node pick out the label from this class map.
x = np.round(nodes_xyt[:,[0]]).astype(np.int32)
y = np.round(nodes_xyt[:,[1]]).astype(np.int32)
ind = np.ravel_multi_index((y,x), class_maps__.shape)
node_class_label = class_maps__.ravel()[ind][:,0]
# Convert to one hot versions.
class_maps_one_hot = np.zeros(class_maps.shape, dtype=np.bool)
node_class_label_one_hot = np.zeros((node_class_label.shape[0], class_maps.shape[2]), dtype=np.bool)
for i in range(class_maps.shape[2]):
class_maps_one_hot[:,:,i] = class_maps__ == i
node_class_label_one_hot[:,i] = node_class_label == i
return class_maps_one_hot, node_class_label_one_hot
def label_nodes_with_class_geodesic(nodes_xyt, class_maps, pix, traversible,
ff_cost=1., fo_cost=1., oo_cost=1.,
connectivity=4):
"""Labels nodes in nodes_xyt with class labels using geodesic distance as
defined by traversible from class_maps.
Inputs:
nodes_xyt
class_maps: counts for each class.
pix: distance threshold to consider close enough to target.
traversible: binary map of whether traversible or not.
Output:
labels: For each node in nodes_xyt returns a label of the class or -1 is
unlabelled.
"""
g, nodes = convert_traversible_to_graph(traversible, ff_cost=ff_cost,
fo_cost=fo_cost, oo_cost=oo_cost,
connectivity=connectivity)
class_dist = np.zeros_like(class_maps*1.)
n_classes = class_maps.shape[2]
if False:
# Assign each pixel to a class based on number of points.
selem = skimage.morphology.disk(pix)
class_maps_ = class_maps*1.
class_maps__ = np.argmax(class_maps_, axis=2)
class_maps__[np.max(class_maps_, axis=2) == 0] = -1
# Label nodes with classes.
for i in range(n_classes):
# class_node_ids = np.where(class_maps__.ravel() == i)[0]
class_node_ids = np.where(class_maps[:,:,i].ravel() > 0)[0]
dist_i = get_distance_node_list(g, class_node_ids, 'to', weights='wts')
class_dist[:,:,i] = np.reshape(dist_i, class_dist[:,:,i].shape)
class_map_geodesic = (class_dist <= pix)
class_map_geodesic = np.reshape(class_map_geodesic, [-1, n_classes])
# For each node pick out the label from this class map.
x = np.round(nodes_xyt[:,[0]]).astype(np.int32)
y = np.round(nodes_xyt[:,[1]]).astype(np.int32)
ind = np.ravel_multi_index((y,x), class_dist[:,:,0].shape)
node_class_label = class_map_geodesic[ind[:,0],:]
class_map_geodesic = class_dist <= pix
return class_map_geodesic, node_class_label
def _get_next_nodes_undirected(n, sc, n_ori):
nodes_to_add = []
nodes_to_validate = []
(p, q, r) = n
nodes_to_add.append((n, (p, q, r), 0))
if n_ori == 4:
for _ in [1, 2, 3, 4]:
if _ == 1:
v = (p - sc, q, r)
elif _ == 2:
v = (p + sc, q, r)
elif _ == 3:
v = (p, q - sc, r)
elif _ == 4:
v = (p, q + sc, r)
nodes_to_validate.append((n, v, _))
return nodes_to_add, nodes_to_validate
def _get_next_nodes(n, sc, n_ori):
nodes_to_add = []
nodes_to_validate = []
(p, q, r) = n
for r_, a_ in zip([-1, 0, 1], [1, 0, 2]):
nodes_to_add.append((n, (p, q, np.mod(r+r_, n_ori)), a_))
if n_ori == 6:
if r == 0:
v = (p + sc, q, r)
elif r == 1:
v = (p + sc, q + sc, r)
elif r == 2:
v = (p, q + sc, r)
elif r == 3:
v = (p - sc, q, r)
elif r == 4:
v = (p - sc, q - sc, r)
elif r == 5:
v = (p, q - sc, r)
elif n_ori == 4:
if r == 0:
v = (p + sc, q, r)
elif r == 1:
v = (p, q + sc, r)
elif r == 2:
v = (p - sc, q, r)
elif r == 3:
v = (p, q - sc, r)
nodes_to_validate.append((n,v,3))
return nodes_to_add, nodes_to_validate
def generate_graph(valid_fn_vec=None, sc=1., n_ori=6,
starting_location=(0, 0, 0), vis=False, directed=True):
timer = utils.Timer()
timer.tic()
if directed: G = nx.DiGraph(directed=True)
else: G = nx.Graph()
G.add_node(starting_location)
new_nodes = G.nodes()
while len(new_nodes) != 0:
nodes_to_add = []
nodes_to_validate = []
for n in new_nodes:
if directed:
na, nv = _get_next_nodes(n, sc, n_ori)
else:
na, nv = _get_next_nodes_undirected(n, sc, n_ori)
nodes_to_add = nodes_to_add + na
if valid_fn_vec is not None:
nodes_to_validate = nodes_to_validate + nv
else:
node_to_add = nodes_to_add + nv
# Validate nodes.
vs = [_[1] for _ in nodes_to_validate]
valids = valid_fn_vec(vs)
for nva, valid in zip(nodes_to_validate, valids):
if valid:
nodes_to_add.append(nva)
new_nodes = []
for n,v,a in nodes_to_add:
if not G.has_node(v):
new_nodes.append(v)
G.add_edge(n, v, action=a)
timer.toc(average=True, log_at=1, log_str='src.graph_utils.generate_graph')
return (G)
def vis_G(G, ax, vertex_color='r', edge_color='b', r=None):
if edge_color is not None:
for e in G.edges():
XYT = zip(*e)
x = XYT[-3]
y = XYT[-2]
t = XYT[-1]
if r is None or t[0] == r:
ax.plot(x, y, edge_color)
if vertex_color is not None:
XYT = zip(*G.nodes())
x = XYT[-3]
y = XYT[-2]
t = XYT[-1]
ax.plot(x, y, vertex_color + '.')
def convert_to_graph_tool(G):
timer = utils.Timer()
timer.tic()
gtG = gt.Graph(directed=G.is_directed())
gtG.ep['action'] = gtG.new_edge_property('int')
nodes_list = G.nodes()
nodes_array = np.array(nodes_list)
nodes_id = np.zeros((nodes_array.shape[0],), dtype=np.int64)
for i in range(nodes_array.shape[0]):
v = gtG.add_vertex()
nodes_id[i] = int(v)
# d = {key: value for (key, value) in zip(nodes_list, nodes_id)}
d = dict(itertools.izip(nodes_list, nodes_id))
for src, dst, data in G.edges_iter(data=True):
e = gtG.add_edge(d[src], d[dst])
gtG.ep['action'][e] = data['action']
nodes_to_id = d
timer.toc(average=True, log_at=1, log_str='src.graph_utils.convert_to_graph_tool')
return gtG, nodes_array, nodes_to_id
def _rejection_sampling(rng, sampling_d, target_d, bins, hardness, M):
bin_ind = np.digitize(hardness, bins)-1
i = 0
ratio = target_d[bin_ind] / (M*sampling_d[bin_ind])
while i < ratio.size and rng.rand() > ratio[i]:
i = i+1
return i
def heuristic_fn_vec(n1, n2, n_ori, step_size):
# n1 is a vector and n2 is a single point.
dx = (n1[:,0] - n2[0,0])/step_size
dy = (n1[:,1] - n2[0,1])/step_size
dt = n1[:,2] - n2[0,2]
dt = np.mod(dt, n_ori)
dt = np.minimum(dt, n_ori-dt)
if n_ori == 6:
if dx*dy > 0:
d = np.maximum(np.abs(dx), np.abs(dy))
else:
d = np.abs(dy-dx)
elif n_ori == 4:
d = np.abs(dx) + np.abs(dy)
return (d + dt).reshape((-1,1))
def get_hardness_distribution(gtG, max_dist, min_dist, rng, trials, bins, nodes,
n_ori, step_size):
heuristic_fn = lambda node_ids, node_id: \
heuristic_fn_vec(nodes[node_ids, :], nodes[[node_id], :], n_ori, step_size)
num_nodes = gtG.num_vertices()
gt_dists = []; h_dists = [];
for i in range(trials):
end_node_id = rng.choice(num_nodes)
gt_dist = gt.topology.shortest_distance(gt.GraphView(gtG, reversed=True),
source=gtG.vertex(end_node_id),
target=None, max_dist=max_dist)
gt_dist = np.array(gt_dist.get_array())
ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0]
gt_dist = gt_dist[ind]
h_dist = heuristic_fn(ind, end_node_id)[:,0]
gt_dists.append(gt_dist)
h_dists.append(h_dist)
gt_dists = np.concatenate(gt_dists)
h_dists = np.concatenate(h_dists)
hardness = 1. - h_dists*1./gt_dists
hist, _ = np.histogram(hardness, bins)
hist = hist.astype(np.float64)
hist = hist / np.sum(hist)
return hist
def rng_next_goal_rejection_sampling(start_node_ids, batch_size, gtG, rng,
max_dist, min_dist, max_dist_to_compute,
sampling_d, target_d,
nodes, n_ori, step_size, bins, M):
sample_start_nodes = start_node_ids is None
dists = []; pred_maps = []; end_node_ids = []; start_node_ids_ = [];
hardnesss = []; gt_dists = [];
num_nodes = gtG.num_vertices()
for i in range(batch_size):
done = False
while not done:
if sample_start_nodes:
start_node_id = rng.choice(num_nodes)
else:
start_node_id = start_node_ids[i]
gt_dist = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=False), source=start_node_id, target=None,
max_dist=max_dist)
gt_dist = np.array(gt_dist.get_array())
ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0]
ind = rng.permutation(ind)
gt_dist = gt_dist[ind]*1.
h_dist = heuristic_fn_vec(nodes[ind, :], nodes[[start_node_id], :],
n_ori, step_size)[:,0]
hardness = 1. - h_dist / gt_dist
sampled_ind = _rejection_sampling(rng, sampling_d, target_d, bins,
hardness, M)
if sampled_ind < ind.size:
# print sampled_ind
end_node_id = ind[sampled_ind]
hardness = hardness[sampled_ind]
gt_dist = gt_dist[sampled_ind]
done = True
# Compute distance from end node to all nodes, to return.
dist, pred_map = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=True), source=end_node_id, target=None,
max_dist=max_dist_to_compute, pred_map=True)
dist = np.array(dist.get_array())
pred_map = np.array(pred_map.get_array())
hardnesss.append(hardness); dists.append(dist); pred_maps.append(pred_map);
start_node_ids_.append(start_node_id); end_node_ids.append(end_node_id);
gt_dists.append(gt_dist);
paths = None
return start_node_ids_, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists
def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist,
max_dist_to_compute, node_room_ids, nodes=None,
compute_path=False, dists_from_start_node=None):
# Compute the distance field from the starting location, and then pick a
# destination in another room if possible otherwise anywhere outside this
# room.
dists = []; pred_maps = []; paths = []; end_node_ids = [];
for i in range(batch_size):
room_id = node_room_ids[start_node_ids[i]]
# Compute distances.
if dists_from_start_node == None:
dist, pred_map = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=False), source=gtG.vertex(start_node_ids[i]),
target=None, max_dist=max_dist_to_compute, pred_map=True)
dist = np.array(dist.get_array())
else:
dist = dists_from_start_node[i]
# Randomly sample nodes which are within max_dist.
near_ids = dist <= max_dist
near_ids = near_ids[:, np.newaxis]
# Check to see if there is a non-negative node which is close enough.
non_same_room_ids = node_room_ids != room_id
non_hallway_ids = node_room_ids != -1
good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids))
good2_ids = np.logical_and(near_ids, non_hallway_ids)
good3_ids = near_ids
if np.any(good1_ids):
end_node_id = rng.choice(np.where(good1_ids)[0])
elif np.any(good2_ids):
end_node_id = rng.choice(np.where(good2_ids)[0])
elif np.any(good3_ids):
end_node_id = rng.choice(np.where(good3_ids)[0])
else:
logging.error('Did not find any good nodes.')
# Compute distance to this new goal for doing distance queries.
dist, pred_map = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id),
target=None, max_dist=max_dist_to_compute, pred_map=True)
dist = np.array(dist.get_array())
pred_map = np.array(pred_map.get_array())
dists.append(dist)
pred_maps.append(pred_map)
end_node_ids.append(end_node_id)
path = None
if compute_path:
path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
paths.append(path)
return start_node_ids, end_node_ids, dists, pred_maps, paths
def rng_room_to_room(batch_size, gtG, rng, max_dist, max_dist_to_compute,
node_room_ids, nodes=None, compute_path=False):
# Sample one of the rooms, compute the distance field. Pick a destination in
# another room if possible otherwise anywhere outside this room.
dists = []; pred_maps = []; paths = []; start_node_ids = []; end_node_ids = [];
room_ids = np.unique(node_room_ids[node_room_ids[:,0] >= 0, 0])
for i in range(batch_size):
room_id = rng.choice(room_ids)
end_node_id = rng.choice(np.where(node_room_ids[:,0] == room_id)[0])
end_node_ids.append(end_node_id)
# Compute distances.
dist, pred_map = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id),
target=None, max_dist=max_dist_to_compute, pred_map=True)
dist = np.array(dist.get_array())
pred_map = np.array(pred_map.get_array())
dists.append(dist)
pred_maps.append(pred_map)
# Randomly sample nodes which are within max_dist.
near_ids = dist <= max_dist
near_ids = near_ids[:, np.newaxis]
# Check to see if there is a non-negative node which is close enough.
non_same_room_ids = node_room_ids != room_id
non_hallway_ids = node_room_ids != -1
good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids))
good2_ids = np.logical_and(near_ids, non_hallway_ids)
good3_ids = near_ids
if np.any(good1_ids):
start_node_id = rng.choice(np.where(good1_ids)[0])
elif np.any(good2_ids):
start_node_id = rng.choice(np.where(good2_ids)[0])
elif np.any(good3_ids):
start_node_id = rng.choice(np.where(good3_ids)[0])
else:
logging.error('Did not find any good nodes.')
start_node_ids.append(start_node_id)
path = None
if compute_path:
path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
paths.append(path)
return start_node_ids, end_node_ids, dists, pred_maps, paths
def rng_target_dist_field(batch_size, gtG, rng, max_dist, max_dist_to_compute,
nodes=None, compute_path=False):
# Sample a single node, compute distance to all nodes less than max_dist,
# sample nodes which are a particular distance away.
dists = []; pred_maps = []; paths = []; start_node_ids = []
end_node_ids = rng.choice(gtG.num_vertices(), size=(batch_size,),
replace=False).tolist()
for i in range(batch_size):
dist, pred_map = gt.topology.shortest_distance(
gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_ids[i]),
target=None, max_dist=max_dist_to_compute, pred_map=True)
dist = np.array(dist.get_array())
pred_map = np.array(pred_map.get_array())
dists.append(dist)
pred_maps.append(pred_map)
# Randomly sample nodes which are withing max_dist
near_ids = np.where(dist <= max_dist)[0]
start_node_id = rng.choice(near_ids, size=(1,), replace=False)[0]
start_node_ids.append(start_node_id)
path = None
if compute_path:
path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
paths.append(path)
return start_node_ids, end_node_ids, dists, pred_maps, paths
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various function to compute the ground truth map for training etc.
"""
import copy
import skimage.morphology
import numpy as np
import scipy.ndimage
import matplotlib.pyplot as plt
import PIL
import src.utils as utils
import cv2
def _get_xy_bounding_box(vertex, padding):
"""Returns the xy bounding box of the environment."""
min_ = np.floor(np.min(vertex[:, :2], axis=0) - padding).astype(np.int)
max_ = np.ceil(np.max(vertex[:, :2], axis=0) + padding).astype(np.int)
return min_, max_
def _project_to_map(map, vertex, wt=None, ignore_points_outside_map=False):
"""Projects points to map, returns how many points are present at each
location."""
num_points = np.zeros((map.size[1], map.size[0]))
vertex_ = vertex[:, :2] - map.origin
vertex_ = np.round(vertex_ / map.resolution).astype(np.int)
if ignore_points_outside_map:
good_ind = np.all(np.array([vertex_[:,1] >= 0, vertex_[:,1] < map.size[1],
vertex_[:,0] >= 0, vertex_[:,0] < map.size[0]]),
axis=0)
vertex_ = vertex_[good_ind, :]
if wt is not None:
wt = wt[good_ind, :]
if wt is None:
np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), 1)
else:
assert(wt.shape[0] == vertex.shape[0]), \
'number of weights should be same as vertices.'
np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), wt)
return num_points
def make_map(padding, resolution, vertex=None, sc=1.):
"""Returns a map structure."""
min_, max_ = _get_xy_bounding_box(vertex*sc, padding=padding)
sz = np.ceil((max_ - min_ + 1) / resolution).astype(np.int32)
max_ = min_ + sz * resolution - 1
map = utils.Foo(origin=min_, size=sz, max=max_, resolution=resolution,
padding=padding)
return map
def _fill_holes(img, thresh):
"""Fills holes less than thresh area (assumes 4 connectivity when computing
hole area."""
l, n = scipy.ndimage.label(np.logical_not(img))
img_ = img == True
cnts = np.bincount(l.reshape(-1))
for i, cnt in enumerate(cnts):
if cnt < thresh:
l[l == i] = -1
img_[l == -1] = True
return img_
def compute_traversibility(map, robot_base, robot_height, robot_radius,
valid_min, valid_max, num_point_threshold, shapess,
sc=100., n_samples_per_face=200):
"""Returns a bit map with pixels that are traversible or not as long as the
robot center is inside this volume we are good colisions can be detected by
doing a line search on things, or walking from current location to final
location in the bitmap, or doing bwlabel on the traversibility map."""
tt = utils.Timer()
tt.tic()
num_obstcale_points = np.zeros((map.size[1], map.size[0]))
num_points = np.zeros((map.size[1], map.size[0]))
for i, shapes in enumerate(shapess):
for j in range(shapes.get_number_of_meshes()):
p, face_areas, face_idx = shapes.sample_points_on_face_of_shape(
j, n_samples_per_face, sc)
wt = face_areas[face_idx]/n_samples_per_face
ind = np.all(np.concatenate(
(p[:, [2]] > robot_base,
p[:, [2]] < robot_base + robot_height), axis=1),axis=1)
num_obstcale_points += _project_to_map(map, p[ind, :], wt[ind])
ind = np.all(np.concatenate(
(p[:, [2]] > valid_min,
p[:, [2]] < valid_max), axis=1),axis=1)
num_points += _project_to_map(map, p[ind, :], wt[ind])
selem = skimage.morphology.disk(robot_radius / map.resolution)
obstacle_free = skimage.morphology.binary_dilation(
_fill_holes(num_obstcale_points > num_point_threshold, 20), selem) != True
valid_space = _fill_holes(num_points > num_point_threshold, 20)
traversible = np.all(np.concatenate((obstacle_free[...,np.newaxis],
valid_space[...,np.newaxis]), axis=2),
axis=2)
# plt.imshow(np.concatenate((obstacle_free, valid_space, traversible), axis=1))
# plt.show()
map_out = copy.deepcopy(map)
map_out.num_obstcale_points = num_obstcale_points
map_out.num_points = num_points
map_out.traversible = traversible
map_out.obstacle_free = obstacle_free
map_out.valid_space = valid_space
tt.toc(log_at=1, log_str='src.map_utils.compute_traversibility: ')
return map_out
def resize_maps(map, map_scales, resize_method):
scaled_maps = []
for i, sc in enumerate(map_scales):
if resize_method == 'antialiasing':
# Resize using open cv so that we can compute the size.
# Use PIL resize to use anti aliasing feature.
map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR)
w = map_.shape[1]; h = map_.shape[0]
map_img = PIL.Image.fromarray((map*255).astype(np.uint8))
map__img = map_img.resize((w,h), PIL.Image.ANTIALIAS)
map_ = np.asarray(map__img).astype(np.float32)
map_ = map_/255.
map_ = np.minimum(map_, 1.0)
map_ = np.maximum(map_, 0.0)
elif resize_method == 'linear_noantialiasing':
map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR)
else:
logging.error('Unknown resizing method')
scaled_maps.append(map_)
return scaled_maps
def pick_largest_cc(traversible):
out = scipy.ndimage.label(traversible)[0]
cnt = np.bincount(out.reshape(-1))[1:]
return out == np.argmax(cnt) + 1
def get_graph_origin_loc(rng, traversible):
"""Erode the traversibility mask so that we get points in the bulk of the
graph, and not end up with a situation where the graph is localized in the
corner of a cramped room. Output Locs is in the coordinate frame of the
map."""
aa = pick_largest_cc(skimage.morphology.binary_erosion(traversible == True,
selem=np.ones((15,15))))
y, x = np.where(aa > 0)
ind = rng.choice(y.size)
locs = np.array([x[ind], y[ind]])
locs = locs + rng.rand(*(locs.shape)) - 0.5
return locs
def generate_egocentric_maps(scaled_maps, map_scales, map_crop_sizes, loc,
x_axis, y_axis, theta):
maps = []
for i, (map_, sc, map_crop_size) in enumerate(zip(scaled_maps, map_scales, map_crop_sizes)):
maps_i = np.array(get_map_to_predict(loc*sc, x_axis, y_axis, map_,
map_crop_size,
interpolation=cv2.INTER_LINEAR)[0])
maps_i[np.isnan(maps_i)] = 0
maps.append(maps_i)
return maps
def generate_goal_images(map_scales, map_crop_sizes, n_ori, goal_dist,
goal_theta, rel_goal_orientation):
goal_dist = goal_dist[:,0]
goal_theta = goal_theta[:,0]
rel_goal_orientation = rel_goal_orientation[:,0]
goals = [];
# Generate the map images.
for i, (sc, map_crop_size) in enumerate(zip(map_scales, map_crop_sizes)):
goal_i = np.zeros((goal_dist.shape[0], map_crop_size, map_crop_size, n_ori),
dtype=np.float32)
x = goal_dist*np.cos(goal_theta)*sc + (map_crop_size-1.)/2.
y = goal_dist*np.sin(goal_theta)*sc + (map_crop_size-1.)/2.
for j in range(goal_dist.shape[0]):
gc = rel_goal_orientation[j]
x0 = np.floor(x[j]).astype(np.int32); x1 = x0 + 1;
y0 = np.floor(y[j]).astype(np.int32); y1 = y0 + 1;
if x0 >= 0 and x0 <= map_crop_size-1:
if y0 >= 0 and y0 <= map_crop_size-1:
goal_i[j, y0, x0, gc] = (x1-x[j])*(y1-y[j])
if y1 >= 0 and y1 <= map_crop_size-1:
goal_i[j, y1, x0, gc] = (x1-x[j])*(y[j]-y0)
if x1 >= 0 and x1 <= map_crop_size-1:
if y0 >= 0 and y0 <= map_crop_size-1:
goal_i[j, y0, x1, gc] = (x[j]-x0)*(y1-y[j])
if y1 >= 0 and y1 <= map_crop_size-1:
goal_i[j, y1, x1, gc] = (x[j]-x0)*(y[j]-y0)
goals.append(goal_i)
return goals
def get_map_to_predict(src_locs, src_x_axiss, src_y_axiss, map, map_size,
interpolation=cv2.INTER_LINEAR):
fss = []
valids = []
center = (map_size-1.0)/2.0
dst_theta = np.pi/2.0
dst_loc = np.array([center, center])
dst_x_axis = np.array([np.cos(dst_theta), np.sin(dst_theta)])
dst_y_axis = np.array([np.cos(dst_theta+np.pi/2), np.sin(dst_theta+np.pi/2)])
def compute_points(center, x_axis, y_axis):
points = np.zeros((3,2),dtype=np.float32)
points[0,:] = center
points[1,:] = center + x_axis
points[2,:] = center + y_axis
return points
dst_points = compute_points(dst_loc, dst_x_axis, dst_y_axis)
for i in range(src_locs.shape[0]):
src_loc = src_locs[i,:]
src_x_axis = src_x_axiss[i,:]
src_y_axis = src_y_axiss[i,:]
src_points = compute_points(src_loc, src_x_axis, src_y_axis)
M = cv2.getAffineTransform(src_points, dst_points)
fs = cv2.warpAffine(map, M, (map_size, map_size), None, flags=interpolation,
borderValue=np.NaN)
valid = np.invert(np.isnan(fs))
valids.append(valid)
fss.append(fs)
return fss, valids
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for generating and applying rotation matrices.
"""
import numpy as np
ANGLE_EPS = 0.001
def normalize(v):
return v / np.linalg.norm(v)
def get_r_matrix(ax_, angle):
ax = normalize(ax_)
if np.abs(angle) > ANGLE_EPS:
S_hat = np.array(
[[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]],
dtype=np.float32)
R = np.eye(3) + np.sin(angle)*S_hat + \
(1-np.cos(angle))*(np.linalg.matrix_power(S_hat, 2))
else:
R = np.eye(3)
return R
def r_between(v_from_, v_to_):
v_from = normalize(v_from_)
v_to = normalize(v_to_)
ax = normalize(np.cross(v_from, v_to))
angle = np.arccos(np.dot(v_from, v_to))
return get_r_matrix(ax, angle)
def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to):
inputs = [up_from, lookat_from, up_to, lookat_to]
for i in range(4):
inputs[i] = normalize(np.array(inputs[i]).reshape((-1,)))
up_from, lookat_from, up_to, lookat_to = inputs
r1 = r_between(lookat_from, lookat_to)
new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1))
to_x = normalize(np.cross(lookat_to, up_to))
angle = np.arccos(np.dot(new_x, to_x))
if angle > ANGLE_EPS:
if angle < np.pi - ANGLE_EPS:
ax = normalize(np.cross(new_x, to_x))
flip = np.dot(lookat_to, ax)
if flip > 0:
r2 = get_r_matrix(lookat_to, angle)
elif flip < 0:
r2 = get_r_matrix(lookat_to, -1. * angle)
else:
# Angle of rotation is too close to 180 degrees, direction of rotation
# does not matter.
r2 = get_r_matrix(lookat_to, angle)
else:
r2 = np.eye(3)
return np.dot(r2, r1)
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Generaly Utilities.
"""
import numpy as np, cPickle, os, time
import src.file_utils as fu
import logging
class Timer():
def __init__(self):
self.calls = 0.
self.start_time = 0.
self.time_per_call = 0.
self.total_time = 0.
self.last_log_time = 0.
def tic(self):
self.start_time = time.time()
def toc(self, average=True, log_at=-1, log_str='', type='calls'):
if self.start_time == 0:
logging.error('Timer not started by calling tic().')
t = time.time()
diff = time.time() - self.start_time
self.total_time += diff
self.calls += 1.
self.time_per_call = self.total_time/self.calls
if type == 'calls' and log_at > 0 and np.mod(self.calls, log_at) == 0:
_ = []
logging.info('%s: %f seconds.', log_str, self.time_per_call)
elif type == 'time' and log_at > 0 and t - self.last_log_time >= log_at:
_ = []
logging.info('%s: %f seconds.', log_str, self.time_per_call)
self.last_log_time = t
if average:
return self.time_per_call
else:
return diff
class Foo(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __str__(self):
str_ = ''
for v in vars(self).keys():
a = getattr(self, v)
if True: #isinstance(v, object):
str__ = str(a)
str__ = str__.replace('\n', '\n ')
else:
str__ = str(a)
str_ += '{:s}: {:s}'.format(v, str__)
str_ += '\n'
return str_
def dict_equal(dict1, dict2):
assert(set(dict1.keys()) == set(dict2.keys())), "Sets of keys between 2 dictionaries are different."
for k in dict1.keys():
assert(type(dict1[k]) == type(dict2[k])), "Type of key '{:s}' if different.".format(k)
if type(dict1[k]) == np.ndarray:
assert(dict1[k].dtype == dict2[k].dtype), "Numpy Type of key '{:s}' if different.".format(k)
assert(np.allclose(dict1[k], dict2[k])), "Value for key '{:s}' do not match.".format(k)
else:
assert(dict1[k] == dict2[k]), "Value for key '{:s}' do not match.".format(k)
return True
def subplot(plt, Y_X, sz_y_sz_x = (10, 10)):
Y,X = Y_X
sz_y, sz_x = sz_y_sz_x
plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y)
fig, axes = plt.subplots(Y, X)
plt.subplots_adjust(wspace=0.1, hspace=0.1)
return fig, axes
def tic_toc_print(interval, string):
global tic_toc_print_time_old
if 'tic_toc_print_time_old' not in globals():
tic_toc_print_time_old = time.time()
print string
else:
new_time = time.time()
if new_time - tic_toc_print_time_old > interval:
tic_toc_print_time_old = new_time;
print string
def mkdir_if_missing(output_dir):
if not fu.exists(output_dir):
fu.makedirs(output_dir)
def save_variables(pickle_file_name, var, info, overwrite = False):
if fu.exists(pickle_file_name) and overwrite == False:
raise Exception('{:s} exists and over write is false.'.format(pickle_file_name))
# Construct the dictionary
assert(type(var) == list); assert(type(info) == list);
d = {}
for i in xrange(len(var)):
d[info[i]] = var[i]
with fu.fopen(pickle_file_name, 'w') as f:
cPickle.dump(d, f, cPickle.HIGHEST_PROTOCOL)
def load_variables(pickle_file_name):
if fu.exists(pickle_file_name):
with fu.fopen(pickle_file_name, 'r') as f:
d = cPickle.load(f)
return d
else:
raise Exception('{:s} does not exists.'.format(pickle_file_name))
def voc_ap(rec, prec):
rec = rec.reshape((-1,1))
prec = prec.reshape((-1,1))
z = np.zeros((1,1))
o = np.ones((1,1))
mrec = np.vstack((z, rec, o))
mpre = np.vstack((z, prec, z))
for i in range(len(mpre)-2, -1, -1):
mpre[i] = max(mpre[i], mpre[i+1])
I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
ap = 0;
for i in I:
ap = ap + (mrec[i] - mrec[i-1])*mpre[i];
return ap
def tight_imshow_figure(plt, figsize=None):
fig = plt.figure(figsize=figsize)
ax = plt.Axes(fig, [0,0,1,1])
ax.set_axis_off()
fig.add_axes(ax)
return fig, ax
def calc_pr(gt, out, wt=None):
if wt is None:
wt = np.ones((gt.size,1))
gt = gt.astype(np.float64).reshape((-1,1))
wt = wt.astype(np.float64).reshape((-1,1))
out = out.astype(np.float64).reshape((-1,1))
gt = gt*wt
tog = np.concatenate([gt, wt, out], axis=1)*1.
ind = np.argsort(tog[:,2], axis=0)[::-1]
tog = tog[ind,:]
cumsumsortgt = np.cumsum(tog[:,0])
cumsumsortwt = np.cumsum(tog[:,1])
prec = cumsumsortgt / cumsumsortwt
rec = cumsumsortgt / np.sum(tog[:,0])
ap = voc_ap(rec, prec)
return ap, rec, prec
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Code for setting up the network for CMP.
Sets up the mapper and the planner.
"""
import sys, os, numpy as np
import matplotlib.pyplot as plt
import copy
import argparse, pprint
import time
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
import logging
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from src import utils
import src.file_utils as fu
import tfcode.nav_utils as nu
import tfcode.cmp_utils as cu
import tfcode.cmp_summary as cmp_s
from tfcode import tf_utils
value_iteration_network = cu.value_iteration_network
rotate_preds = cu.rotate_preds
deconv = cu.deconv
get_visual_frustum = cu.get_visual_frustum
fr_v2 = cu.fr_v2
setup_train_step_kwargs = nu.default_train_step_kwargs
compute_losses_multi_or = nu.compute_losses_multi_or
get_repr_from_image = nu.get_repr_from_image
_save_d_at_t = nu.save_d_at_t
_save_all = nu.save_all
_eval_ap = nu.eval_ap
_eval_dist = nu.eval_dist
_plot_trajectories = nu.plot_trajectories
_vis_readout_maps = cmp_s._vis_readout_maps
_vis = cmp_s._vis
_summary_vis = cmp_s._summary_vis
_summary_readout_maps = cmp_s._summary_readout_maps
_add_summaries = cmp_s._add_summaries
def _inputs(problem):
# Set up inputs.
with tf.name_scope('inputs'):
inputs = []
inputs.append(('orig_maps', tf.float32,
(problem.batch_size, 1, None, None, 1)))
inputs.append(('goal_loc', tf.float32,
(problem.batch_size, problem.num_goals, 2)))
common_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
if problem.input_type == 'vision':
# Multiple images from an array of cameras.
inputs.append(('imgs', tf.float32,
(problem.batch_size, None, len(problem.aux_delta_thetas)+1,
problem.img_height, problem.img_width,
problem.img_channels)))
elif problem.input_type == 'analytical_counts':
for i in range(len(problem.map_crop_sizes)):
inputs.append(('analytical_counts_{:d}'.format(i), tf.float32,
(problem.batch_size, None, problem.map_crop_sizes[i],
problem.map_crop_sizes[i], problem.map_channels)))
if problem.outputs.readout_maps:
for i in range(len(problem.readout_maps_crop_sizes)):
inputs.append(('readout_maps_{:d}'.format(i), tf.float32,
(problem.batch_size, None,
problem.readout_maps_crop_sizes[i],
problem.readout_maps_crop_sizes[i],
problem.readout_maps_channels)))
for i in range(len(problem.map_crop_sizes)):
inputs.append(('ego_goal_imgs_{:d}'.format(i), tf.float32,
(problem.batch_size, None, problem.map_crop_sizes[i],
problem.map_crop_sizes[i], problem.goal_channels)))
for s in ['sum_num', 'sum_denom', 'max_denom']:
inputs.append(('running_'+s+'_{:d}'.format(i), tf.float32,
(problem.batch_size, 1, problem.map_crop_sizes[i],
problem.map_crop_sizes[i], problem.map_channels)))
inputs.append(('incremental_locs', tf.float32,
(problem.batch_size, None, 2)))
inputs.append(('incremental_thetas', tf.float32,
(problem.batch_size, None, 1)))
inputs.append(('step_number', tf.int32, (1, None, 1)))
inputs.append(('node_ids', tf.int32, (problem.batch_size, None,
problem.node_ids_dim)))
inputs.append(('perturbs', tf.float32, (problem.batch_size, None,
problem.perturbs_dim)))
# For plotting result plots
inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2)))
inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1)))
step_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
inputs.append(('action', tf.int32, (problem.batch_size, None, problem.num_actions)))
train_data, _ = tf_utils.setup_inputs(inputs)
train_data.update(step_input_data)
train_data.update(common_input_data)
return common_input_data, step_input_data, train_data
def readout_general(multi_scale_belief, num_neurons, strides, layers_per_block,
kernel_size, batch_norm_is_training_op, wt_decay):
multi_scale_belief = tf.stop_gradient(multi_scale_belief)
with tf.variable_scope('readout_maps_deconv'):
x, outs = deconv(multi_scale_belief, batch_norm_is_training_op,
wt_decay=wt_decay, neurons=num_neurons, strides=strides,
layers_per_block=layers_per_block, kernel_size=kernel_size,
conv_fn=slim.conv2d_transpose, offset=0,
name='readout_maps_deconv')
probs = tf.sigmoid(x)
return x, probs
def running_combine(fss_logits, confs_probs, incremental_locs,
incremental_thetas, previous_sum_num, previous_sum_denom,
previous_max_denom, map_size, num_steps):
# fss_logits is B x N x H x W x C
# confs_logits is B x N x H x W x C
# incremental_locs is B x N x 2
# incremental_thetas is B x N x 1
# previous_sum_num etc is B x 1 x H x W x C
with tf.name_scope('combine_{:d}'.format(num_steps)):
running_sum_nums_ = []; running_sum_denoms_ = [];
running_max_denoms_ = [];
fss_logits_ = tf.unstack(fss_logits, axis=1, num=num_steps)
confs_probs_ = tf.unstack(confs_probs, axis=1, num=num_steps)
incremental_locs_ = tf.unstack(incremental_locs, axis=1, num=num_steps)
incremental_thetas_ = tf.unstack(incremental_thetas, axis=1, num=num_steps)
running_sum_num = tf.unstack(previous_sum_num, axis=1, num=1)[0]
running_sum_denom = tf.unstack(previous_sum_denom, axis=1, num=1)[0]
running_max_denom = tf.unstack(previous_max_denom, axis=1, num=1)[0]
for i in range(num_steps):
# Rotate the previous running_num and running_denom
running_sum_num, running_sum_denom, running_max_denom = rotate_preds(
incremental_locs_[i], incremental_thetas_[i], map_size,
[running_sum_num, running_sum_denom, running_max_denom],
output_valid_mask=False)[0]
# print i, num_steps, running_sum_num.get_shape().as_list()
running_sum_num = running_sum_num + fss_logits_[i] * confs_probs_[i]
running_sum_denom = running_sum_denom + confs_probs_[i]
running_max_denom = tf.maximum(running_max_denom, confs_probs_[i])
running_sum_nums_.append(running_sum_num)
running_sum_denoms_.append(running_sum_denom)
running_max_denoms_.append(running_max_denom)
running_sum_nums = tf.stack(running_sum_nums_, axis=1)
running_sum_denoms = tf.stack(running_sum_denoms_, axis=1)
running_max_denoms = tf.stack(running_max_denoms_, axis=1)
return running_sum_nums, running_sum_denoms, running_max_denoms
def get_map_from_images(imgs, mapper_arch, task_params, freeze_conv, wt_decay,
is_training, batch_norm_is_training_op, num_maps,
split_maps=True):
# Hit image with a resnet.
n_views = len(task_params.aux_delta_thetas) + 1
out = utils.Foo()
images_reshaped = tf.reshape(imgs,
shape=[-1, task_params.img_height,
task_params.img_width,
task_params.img_channels], name='re_image')
x, out.vars_to_restore = get_repr_from_image(
images_reshaped, task_params.modalities, task_params.data_augment,
mapper_arch.encoder, freeze_conv, wt_decay, is_training)
# Reshape into nice things so that these can be accumulated over time steps
# for faster backprop.
sh_before = x.get_shape().as_list()
out.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:])
x = tf.reshape(out.encoder_output, shape=[-1] + sh_before[1:])
# Add a layer to reduce dimensions for a fc layer.
if mapper_arch.dim_reduce_neurons > 0:
ks = 1; neurons = mapper_arch.dim_reduce_neurons;
init_var = np.sqrt(2.0/(ks**2)/neurons)
batch_norm_param = mapper_arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
out.conv_feat = slim.conv2d(x, neurons, kernel_size=ks, stride=1,
normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param,
padding='SAME', scope='dim_reduce',
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var))
reshape_conv_feat = slim.flatten(out.conv_feat)
sh = reshape_conv_feat.get_shape().as_list()
out.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views])
with tf.variable_scope('fc'):
# Fully connected layers to compute the representation in top-view space.
fc_batch_norm_param = {'center': True, 'scale': True,
'activation_fn':tf.nn.relu,
'is_training': batch_norm_is_training_op}
f = out.reshape_conv_feat
out_neurons = (mapper_arch.fc_out_size**2)*mapper_arch.fc_out_neurons
neurons = mapper_arch.fc_neurons + [out_neurons]
f, _ = tf_utils.fc_network(f, neurons=neurons, wt_decay=wt_decay,
name='fc', offset=0,
batch_norm_param=fc_batch_norm_param,
is_training=is_training,
dropout_ratio=mapper_arch.fc_dropout)
f = tf.reshape(f, shape=[-1, mapper_arch.fc_out_size,
mapper_arch.fc_out_size,
mapper_arch.fc_out_neurons], name='re_fc')
# Use pool5 to predict the free space map via deconv layers.
with tf.variable_scope('deconv'):
x, outs = deconv(f, batch_norm_is_training_op, wt_decay=wt_decay,
neurons=mapper_arch.deconv_neurons,
strides=mapper_arch.deconv_strides,
layers_per_block=mapper_arch.deconv_layers_per_block,
kernel_size=mapper_arch.deconv_kernel_size,
conv_fn=slim.conv2d_transpose, offset=0, name='deconv')
# Reshape x the right way.
sh = x.get_shape().as_list()
x = tf.reshape(x, shape=[task_params.batch_size, -1] + sh[1:])
out.deconv_output = x
# Separate out the map and the confidence predictions, pass the confidence
# through a sigmoid.
if split_maps:
with tf.name_scope('split'):
out_all = tf.split(value=x, axis=4, num_or_size_splits=2*num_maps)
out.fss_logits = out_all[:num_maps]
out.confs_logits = out_all[num_maps:]
with tf.name_scope('sigmoid'):
out.confs_probs = [tf.nn.sigmoid(x) for x in out.confs_logits]
return out
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
assert(args.arch.multi_scale), 'removed support for old single scale code.'
# Set up the model.
tf.set_random_seed(args.solver.seed)
task_params = args.navtask.task_params
batch_norm_is_training_op = \
tf.placeholder_with_default(batch_norm_is_training, shape=[],
name='batch_norm_is_training_op')
# Setup the inputs
m.input_tensors = {}
m.train_ops = {}
m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \
_inputs(task_params)
m.init_fn = None
if task_params.input_type == 'vision':
m.vision_ops = get_map_from_images(
m.input_tensors['step']['imgs'], args.mapper_arch,
task_params, args.solver.freeze_conv,
args.solver.wt_decay, is_training, batch_norm_is_training_op,
num_maps=len(task_params.map_crop_sizes))
# Load variables from snapshot if needed.
if args.solver.pretrained_path is not None:
m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
m.vision_ops.vars_to_restore)
# Set up caching of vision features if needed.
if args.solver.freeze_conv:
m.train_ops['step_data_cache'] = [m.vision_ops.encoder_output]
else:
m.train_ops['step_data_cache'] = []
# Set up blobs that are needed for the computation in rest of the graph.
m.ego_map_ops = m.vision_ops.fss_logits
m.coverage_ops = m.vision_ops.confs_probs
# Zero pad these to make them same size as what the planner expects.
for i in range(len(m.ego_map_ops)):
if args.mapper_arch.pad_map_with_zeros_each[i] > 0:
paddings = np.zeros((5,2), dtype=np.int32)
paddings[2:4,:] = args.mapper_arch.pad_map_with_zeros_each[i]
paddings_op = tf.constant(paddings, dtype=tf.int32)
m.ego_map_ops[i] = tf.pad(m.ego_map_ops[i], paddings=paddings_op)
m.coverage_ops[i] = tf.pad(m.coverage_ops[i], paddings=paddings_op)
elif task_params.input_type == 'analytical_counts':
m.ego_map_ops = []; m.coverage_ops = []
for i in range(len(task_params.map_crop_sizes)):
ego_map_op = m.input_tensors['step']['analytical_counts_{:d}'.format(i)]
coverage_op = tf.cast(tf.greater_equal(
tf.reduce_max(ego_map_op, reduction_indices=[4],
keep_dims=True), 1), tf.float32)
coverage_op = tf.ones_like(ego_map_op) * coverage_op
m.ego_map_ops.append(ego_map_op)
m.coverage_ops.append(coverage_op)
m.train_ops['step_data_cache'] = []
num_steps = task_params.num_steps
num_goals = task_params.num_goals
map_crop_size_ops = []
for map_crop_size in task_params.map_crop_sizes:
map_crop_size_ops.append(tf.constant(map_crop_size, dtype=tf.int32, shape=(2,)))
with tf.name_scope('check_size'):
is_single_step = tf.equal(tf.unstack(tf.shape(m.ego_map_ops[0]), num=5)[1], 1)
fr_ops = []; value_ops = [];
fr_intermediate_ops = []; value_intermediate_ops = [];
crop_value_ops = [];
resize_crop_value_ops = [];
confs = []; occupancys = [];
previous_value_op = None
updated_state = []; state_names = [];
for i in range(len(task_params.map_crop_sizes)):
map_crop_size = task_params.map_crop_sizes[i]
with tf.variable_scope('scale_{:d}'.format(i)):
# Accumulate the map.
fn = lambda ns: running_combine(
m.ego_map_ops[i],
m.coverage_ops[i],
m.input_tensors['step']['incremental_locs'] * task_params.map_scales[i],
m.input_tensors['step']['incremental_thetas'],
m.input_tensors['step']['running_sum_num_{:d}'.format(i)],
m.input_tensors['step']['running_sum_denom_{:d}'.format(i)],
m.input_tensors['step']['running_max_denom_{:d}'.format(i)],
map_crop_size, ns)
running_sum_num, running_sum_denom, running_max_denom = \
tf.cond(is_single_step, lambda: fn(1), lambda: fn(num_steps*num_goals))
updated_state += [running_sum_num, running_sum_denom, running_max_denom]
state_names += ['running_sum_num_{:d}'.format(i),
'running_sum_denom_{:d}'.format(i),
'running_max_denom_{:d}'.format(i)]
# Concat the accumulated map and goal
occupancy = running_sum_num / tf.maximum(running_sum_denom, 0.001)
conf = running_max_denom
# print occupancy.get_shape().as_list()
# Concat occupancy, how much occupied and goal.
with tf.name_scope('concat'):
sh = [-1, map_crop_size, map_crop_size, task_params.map_channels]
occupancy = tf.reshape(occupancy, shape=sh)
conf = tf.reshape(conf, shape=sh)
sh = [-1, map_crop_size, map_crop_size, task_params.goal_channels]
goal = tf.reshape(m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)], shape=sh)
to_concat = [occupancy, conf, goal]
if previous_value_op is not None:
to_concat.append(previous_value_op)
x = tf.concat(to_concat, 3)
# Pass the map, previous rewards and the goal through a few convolutional
# layers to get fR.
fr_op, fr_intermediate_op = fr_v2(
x, output_neurons=args.arch.fr_neurons,
inside_neurons=args.arch.fr_inside_neurons,
is_training=batch_norm_is_training_op, name='fr',
wt_decay=args.solver.wt_decay, stride=args.arch.fr_stride)
# Do Value Iteration on the fR
if args.arch.vin_num_iters > 0:
value_op, value_intermediate_op = value_iteration_network(
fr_op, num_iters=args.arch.vin_num_iters,
val_neurons=args.arch.vin_val_neurons,
action_neurons=args.arch.vin_action_neurons,
kernel_size=args.arch.vin_ks, share_wts=args.arch.vin_share_wts,
name='vin', wt_decay=args.solver.wt_decay)
else:
value_op = fr_op
value_intermediate_op = []
# Crop out and upsample the previous value map.
remove = args.arch.crop_remove_each
if remove > 0:
crop_value_op = value_op[:, remove:-remove, remove:-remove,:]
else:
crop_value_op = value_op
crop_value_op = tf.reshape(crop_value_op, shape=[-1, args.arch.value_crop_size,
args.arch.value_crop_size,
args.arch.vin_val_neurons])
if i < len(task_params.map_crop_sizes)-1:
# Reshape it to shape of the next scale.
previous_value_op = tf.image.resize_bilinear(crop_value_op,
map_crop_size_ops[i+1],
align_corners=True)
resize_crop_value_ops.append(previous_value_op)
occupancys.append(occupancy)
confs.append(conf)
value_ops.append(value_op)
crop_value_ops.append(crop_value_op)
fr_ops.append(fr_op)
fr_intermediate_ops.append(fr_intermediate_op)
m.value_ops = value_ops
m.value_intermediate_ops = value_intermediate_ops
m.fr_ops = fr_ops
m.fr_intermediate_ops = fr_intermediate_ops
m.final_value_op = crop_value_op
m.crop_value_ops = crop_value_ops
m.resize_crop_value_ops = resize_crop_value_ops
m.confs = confs
m.occupancys = occupancys
sh = [-1, args.arch.vin_val_neurons*((args.arch.value_crop_size)**2)]
m.value_features_op = tf.reshape(m.final_value_op, sh, name='reshape_value_op')
# Determine what action to take.
with tf.variable_scope('action_pred'):
batch_norm_param = args.arch.pred_batch_norm_param
if batch_norm_param is not None:
batch_norm_param['is_training'] = batch_norm_is_training_op
m.action_logits_op, _ = tf_utils.fc_network(
m.value_features_op, neurons=args.arch.pred_neurons,
wt_decay=args.solver.wt_decay, name='pred', offset=0,
num_pred=task_params.num_actions,
batch_norm_param=batch_norm_param)
m.action_prob_op = tf.nn.softmax(m.action_logits_op)
init_state = tf.constant(0., dtype=tf.float32, shape=[
task_params.batch_size, 1, map_crop_size, map_crop_size,
task_params.map_channels])
m.train_ops['state_names'] = state_names
m.train_ops['updated_state'] = updated_state
m.train_ops['init_state'] = [init_state for _ in updated_state]
m.train_ops['step'] = m.action_prob_op
m.train_ops['common'] = [m.input_tensors['common']['orig_maps'],
m.input_tensors['common']['goal_loc']]
m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op
m.loss_ops = []; m.loss_ops_names = [];
if args.arch.readout_maps:
with tf.name_scope('readout_maps'):
all_occupancys = tf.concat(m.occupancys + m.confs, 3)
readout_maps, probs = readout_general(
all_occupancys, num_neurons=args.arch.rom_arch.num_neurons,
strides=args.arch.rom_arch.strides,
layers_per_block=args.arch.rom_arch.layers_per_block,
kernel_size=args.arch.rom_arch.kernel_size,
batch_norm_is_training_op=batch_norm_is_training_op,
wt_decay=args.solver.wt_decay)
gt_ego_maps = [m.input_tensors['step']['readout_maps_{:d}'.format(i)]
for i in range(len(task_params.readout_maps_crop_sizes))]
m.readout_maps_gt = tf.concat(gt_ego_maps, 4)
gt_shape = tf.shape(m.readout_maps_gt)
m.readout_maps_logits = tf.reshape(readout_maps, gt_shape)
m.readout_maps_probs = tf.reshape(probs, gt_shape)
# Add a loss op
m.readout_maps_loss_op = tf.losses.sigmoid_cross_entropy(
tf.reshape(m.readout_maps_gt, [-1, len(task_params.readout_maps_crop_sizes)]),
tf.reshape(readout_maps, [-1, len(task_params.readout_maps_crop_sizes)]),
scope='loss')
m.readout_maps_loss_op = 10.*m.readout_maps_loss_op
ewma_decay = 0.99 if is_training else 0.0
weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32,
name='weight')
m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
compute_losses_multi_or(m.action_logits_op,
m.input_tensors['train']['action'], weights=weight,
num_actions=task_params.num_actions,
data_loss_wt=args.solver.data_loss_wt,
reg_loss_wt=args.solver.reg_loss_wt,
ewma_decay=ewma_decay)
if args.arch.readout_maps:
m.total_loss_op = m.total_loss_op + m.readout_maps_loss_op
m.loss_ops += [m.readout_maps_loss_op]
m.loss_ops_names += ['readout_maps_loss']
m.loss_ops += [m.reg_loss_op, m.data_loss_op, m.total_loss_op]
m.loss_ops_names += ['reg_loss', 'data_loss', 'total_loss']
if args.solver.freeze_conv:
vars_to_optimize = list(set(tf.trainable_variables()) -
set(m.vision_ops.vars_to_restore))
else:
vars_to_optimize = None
m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
m.sync_optimizer = tf_utils.setup_training(
m.total_loss_op,
args.solver.initial_learning_rate,
args.solver.steps_per_decay,
args.solver.learning_rate_decay,
args.solver.momentum,
args.solver.max_steps,
args.solver.sync,
args.solver.adjust_lr_sync,
args.solver.num_workers,
args.solver.task,
vars_to_optimize=vars_to_optimize,
clip_gradient_norm=args.solver.clip_gradient_norm,
typ=args.solver.typ, momentum2=args.solver.momentum2,
adam_eps=args.solver.adam_eps)
if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k,
m.global_step_op)
elif args.arch.sample_gt_prob_type == 'zero':
m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)
elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
step = int(args.arch.sample_gt_prob_type.split('_')[1])
m.sample_gt_prob_op = tf_utils.step_gt_prob(
step, m.input_tensors['step']['step_number'][0,0,0])
m.sample_action_type = args.arch.action_sample_type
m.sample_action_combine_type = args.arch.action_sample_combine_type
m.summary_ops = {
summary_mode: _add_summaries(m, args, summary_mode,
args.summary.arop_full_summary_iters)}
m.init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
write_version=tf.train.SaverDef.V2)
return m
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Code for setting up summaries for CMP.
"""
import sys, os, numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
import logging
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from src import utils
import src.file_utils as fu
import tfcode.nav_utils as nu
def _vis_readout_maps(outputs, global_step, output_dir, metric_summary, N):
# outputs is [gt_map, pred_map]:
if N >= 0:
outputs = outputs[:N]
N = len(outputs)
plt.set_cmap('jet')
fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*2), (5,5))
axes = axes.ravel()[::-1].tolist()
for i in range(N):
gt_map, pred_map = outputs[i]
for j in [0]:
for k in range(gt_map.shape[4]):
# Display something like the midpoint of the trajectory.
id = np.int(gt_map.shape[1]/2)
ax = axes.pop();
ax.imshow(gt_map[j,id,:,:,k], origin='lower', interpolation='none',
vmin=0., vmax=1.)
ax.set_axis_off();
if i == 0: ax.set_title('gt_map')
ax = axes.pop();
ax.imshow(pred_map[j,id,:,:,k], origin='lower', interpolation='none',
vmin=0., vmax=1.)
ax.set_axis_off();
if i == 0: ax.set_title('pred_map')
file_name = os.path.join(output_dir, 'readout_map_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
plt.close(fig)
def _vis(outputs, global_step, output_dir, metric_summary, N):
# Plot the value map, goal for various maps to see what if the model is
# learning anything useful.
#
# outputs is [values, goals, maps, occupancy, conf].
#
if N >= 0:
outputs = outputs[:N]
N = len(outputs)
plt.set_cmap('jet')
fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*5), (5,5))
axes = axes.ravel()[::-1].tolist()
for i in range(N):
values, goals, maps, occupancy, conf = outputs[i]
for j in [0]:
for k in range(values.shape[4]):
# Display something like the midpoint of the trajectory.
id = np.int(values.shape[1]/2)
ax = axes.pop();
ax.imshow(goals[j,id,:,:,k], origin='lower', interpolation='none')
ax.set_axis_off();
if i == 0: ax.set_title('goal')
ax = axes.pop();
ax.imshow(occupancy[j,id,:,:,k], origin='lower', interpolation='none')
ax.set_axis_off();
if i == 0: ax.set_title('occupancy')
ax = axes.pop();
ax.imshow(conf[j,id,:,:,k], origin='lower', interpolation='none',
vmin=0., vmax=1.)
ax.set_axis_off();
if i == 0: ax.set_title('conf')
ax = axes.pop();
ax.imshow(values[j,id,:,:,k], origin='lower', interpolation='none')
ax.set_axis_off();
if i == 0: ax.set_title('value')
ax = axes.pop();
ax.imshow(maps[j,id,:,:,k], origin='lower', interpolation='none')
ax.set_axis_off();
if i == 0: ax.set_title('incr map')
file_name = os.path.join(output_dir, 'value_vis_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
plt.close(fig)
def _summary_vis(m, batch_size, num_steps, arop_full_summary_iters):
arop = []; arop_summary_iters = []; arop_eval_fns = [];
vis_value_ops = []; vis_goal_ops = []; vis_map_ops = [];
vis_occupancy_ops = []; vis_conf_ops = [];
for i, val_op in enumerate(m.value_ops):
vis_value_op = tf.reduce_mean(tf.abs(val_op), axis=3, keep_dims=True)
vis_value_ops.append(vis_value_op)
vis_occupancy_op = tf.reduce_mean(tf.abs(m.occupancys[i]), 3, True)
vis_occupancy_ops.append(vis_occupancy_op)
vis_conf_op = tf.reduce_max(tf.abs(m.confs[i]), axis=3, keep_dims=True)
vis_conf_ops.append(vis_conf_op)
ego_goal_imgs_i_op = m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)]
vis_goal_op = tf.reduce_max(ego_goal_imgs_i_op, 4, True)
vis_goal_ops.append(vis_goal_op)
vis_map_op = tf.reduce_mean(tf.abs(m.ego_map_ops[i]), 4, True)
vis_map_ops.append(vis_map_op)
vis_goal_ops = tf.concat(vis_goal_ops, 4)
vis_map_ops = tf.concat(vis_map_ops, 4)
vis_value_ops = tf.concat(vis_value_ops, 3)
vis_occupancy_ops = tf.concat(vis_occupancy_ops, 3)
vis_conf_ops = tf.concat(vis_conf_ops, 3)
sh = tf.unstack(tf.shape(vis_value_ops))[1:]
vis_value_ops = tf.reshape(vis_value_ops, shape=[batch_size, -1] + sh)
sh = tf.unstack(tf.shape(vis_conf_ops))[1:]
vis_conf_ops = tf.reshape(vis_conf_ops, shape=[batch_size, -1] + sh)
sh = tf.unstack(tf.shape(vis_occupancy_ops))[1:]
vis_occupancy_ops = tf.reshape(vis_occupancy_ops, shape=[batch_size,-1] + sh)
# Save memory, only return time steps that need to be visualized, factor of
# 32 CPU memory saving.
id = np.int(num_steps/2)
vis_goal_ops = tf.expand_dims(vis_goal_ops[:,id,:,:,:], axis=1)
vis_map_ops = tf.expand_dims(vis_map_ops[:,id,:,:,:], axis=1)
vis_value_ops = tf.expand_dims(vis_value_ops[:,id,:,:,:], axis=1)
vis_conf_ops = tf.expand_dims(vis_conf_ops[:,id,:,:,:], axis=1)
vis_occupancy_ops = tf.expand_dims(vis_occupancy_ops[:,id,:,:,:], axis=1)
arop += [[vis_value_ops, vis_goal_ops, vis_map_ops, vis_occupancy_ops,
vis_conf_ops]]
arop_summary_iters += [arop_full_summary_iters]
arop_eval_fns += [_vis]
return arop, arop_summary_iters, arop_eval_fns
def _summary_readout_maps(m, num_steps, arop_full_summary_iters):
arop = []; arop_summary_iters = []; arop_eval_fns = [];
id = np.int(num_steps-1)
vis_readout_maps_gt = m.readout_maps_gt
vis_readout_maps_prob = tf.reshape(m.readout_maps_probs,
shape=tf.shape(vis_readout_maps_gt))
vis_readout_maps_gt = tf.expand_dims(vis_readout_maps_gt[:,id,:,:,:], 1)
vis_readout_maps_prob = tf.expand_dims(vis_readout_maps_prob[:,id,:,:,:], 1)
arop += [[vis_readout_maps_gt, vis_readout_maps_prob]]
arop_summary_iters += [arop_full_summary_iters]
arop_eval_fns += [_vis_readout_maps]
return arop, arop_summary_iters, arop_eval_fns
def _add_summaries(m, args, summary_mode, arop_full_summary_iters):
task_params = args.navtask.task_params
summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op] + \
m.loss_ops + m.acc_ops
summarize_names = ['lr', 'global_step', 'sample_gt_prob_op'] + \
m.loss_ops_names + ['acc_{:d}'.format(i) for i in range(len(m.acc_ops))]
to_aggregate = [0, 0, 0] + [1]*len(m.loss_ops_names) + [1]*len(m.acc_ops)
scope_name = 'summary'
with tf.name_scope(scope_name):
s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters,
summarize_ops, summarize_names,
to_aggregate, m.action_prob_op,
m.input_tensors, scope_name=scope_name)
if summary_mode == 'val':
arop, arop_summary_iters, arop_eval_fns = _summary_vis(
m, task_params.batch_size, task_params.num_steps,
arop_full_summary_iters)
s_ops.additional_return_ops += arop
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
if args.arch.readout_maps:
arop, arop_summary_iters, arop_eval_fns = _summary_readout_maps(
m, task_params.num_steps, arop_full_summary_iters)
s_ops.additional_return_ops += arop
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
return s_ops
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for setting up the CMP graph.
"""
import os, numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
import logging
from src import utils
import src.file_utils as fu
from tfcode import tf_utils
resnet_v2 = tf_utils.resnet_v2
custom_residual_block = tf_utils.custom_residual_block
def value_iteration_network(
fr, num_iters, val_neurons, action_neurons, kernel_size, share_wts=False,
name='vin', wt_decay=0.0001, activation_fn=None, shape_aware=False):
"""
Constructs a Value Iteration Network, convolutions and max pooling across
channels.
Input:
fr: NxWxHxC
val_neurons: Number of channels for maintaining the value.
action_neurons: Computes action_neurons * val_neurons at each iteration to
max pool over.
Output:
value image: NxHxWx(val_neurons)
"""
init_var = np.sqrt(2.0/(kernel_size**2)/(val_neurons*action_neurons))
vals = []
with tf.variable_scope(name) as varscope:
if shape_aware == False:
fr_shape = tf.unstack(tf.shape(fr))
val_shape = tf.stack(fr_shape[:-1] + [val_neurons])
val = tf.zeros(val_shape, name='val_init')
else:
val = tf.expand_dims(tf.zeros_like(fr[:,:,:,0]), dim=-1) * \
tf.constant(0., dtype=tf.float32, shape=[1,1,1,val_neurons])
val_shape = tf.shape(val)
vals.append(val)
for i in range(num_iters):
if share_wts:
# The first Value Iteration maybe special, so it can have its own
# paramterss.
scope = 'conv'
if i == 0: scope = 'conv_0'
if i > 1: varscope.reuse_variables()
else:
scope = 'conv_{:d}'.format(i)
val = slim.conv2d(tf.concat([val, fr], 3, name='concat_{:d}'.format(i)),
num_outputs=action_neurons*val_neurons,
kernel_size=kernel_size, stride=1, activation_fn=activation_fn,
scope=scope, normalizer_fn=None,
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer())
val = tf.reshape(val, [-1, action_neurons*val_neurons, 1, 1],
name='re_{:d}'.format(i))
val = slim.max_pool2d(val, kernel_size=[action_neurons,1],
stride=[action_neurons,1], padding='VALID',
scope='val_{:d}'.format(i))
val = tf.reshape(val, val_shape, name='unre_{:d}'.format(i))
vals.append(val)
return val, vals
def rotate_preds(loc_on_map, relative_theta, map_size, preds,
output_valid_mask):
with tf.name_scope('rotate'):
flow_op = tf_utils.get_flow(loc_on_map, relative_theta, map_size=map_size)
if type(preds) != list:
rotated_preds, valid_mask_warps = tf_utils.dense_resample(preds, flow_op,
output_valid_mask)
else:
rotated_preds = [] ;valid_mask_warps = []
for pred in preds:
rotated_pred, valid_mask_warp = tf_utils.dense_resample(pred, flow_op,
output_valid_mask)
rotated_preds.append(rotated_pred)
valid_mask_warps.append(valid_mask_warp)
return rotated_preds, valid_mask_warps
def get_visual_frustum(map_size, shape_like, expand_dims=[0,0]):
with tf.name_scope('visual_frustum'):
l = np.tril(np.ones(map_size)) ;l = l + l[:,::-1]
l = (l == 2).astype(np.float32)
for e in expand_dims:
l = np.expand_dims(l, axis=e)
confs_probs = tf.constant(l, dtype=tf.float32)
confs_probs = tf.ones_like(shape_like, dtype=tf.float32) * confs_probs
return confs_probs
def deconv(x, is_training, wt_decay, neurons, strides, layers_per_block,
kernel_size, conv_fn, name, offset=0):
"""Generates a up sampling network with residual connections.
"""
batch_norm_param = {'center': True, 'scale': True,
'activation_fn': tf.nn.relu,
'is_training': is_training}
outs = []
for i, (neuron, stride) in enumerate(zip(neurons, strides)):
for s in range(layers_per_block):
scope = '{:s}_{:d}_{:d}'.format(name, i+1+offset,s+1)
x = custom_residual_block(x, neuron, kernel_size, stride, scope,
is_training, wt_decay, use_residual=True,
residual_stride_conv=True, conv_fn=conv_fn,
batch_norm_param=batch_norm_param)
stride = 1
outs.append((x,True))
return x, outs
def fr_v2(x, output_neurons, inside_neurons, is_training, name='fr',
wt_decay=0.0001, stride=1, updates_collections=tf.GraphKeys.UPDATE_OPS):
"""Performs fusion of information between the map and the reward map.
Inputs
x: NxHxWxC1
Outputs
fr map: NxHxWx(output_neurons)
"""
if type(stride) != list:
stride = [stride]
with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(
is_training=is_training, weight_decay=wt_decay)):
with slim.arg_scope([slim.batch_norm], updates_collections=updates_collections) as arg_sc:
# Change the updates_collections for the conv normalizer_params to None
for i in range(len(arg_sc.keys())):
if 'convolution' in arg_sc.keys()[i]:
arg_sc.values()[i]['normalizer_params']['updates_collections'] = updates_collections
with slim.arg_scope(arg_sc):
bottleneck = resnet_v2.bottleneck
blocks = []
for i, s in enumerate(stride):
b = resnet_v2.resnet_utils.Block(
'block{:d}'.format(i + 1), bottleneck, [{
'depth': output_neurons,
'depth_bottleneck': inside_neurons,
'stride': stride[i]
}])
blocks.append(b)
x, outs = resnet_v2.resnet_v2(x, blocks, num_classes=None, global_pool=False,
output_stride=None, include_root_block=False,
reuse=False, scope=name)
return x, outs
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various losses for training navigation agents.
Defines various loss functions for navigation agents,
compute_losses_multi_or.
"""
import os, numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
from tensorflow.contrib.slim.nets import resnet_v2
from tensorflow.python.training import moving_averages
import logging
from src import utils
import src.file_utils as fu
from tfcode import tf_utils
def compute_losses_multi_or(logits, actions_one_hot, weights=None,
num_actions=-1, data_loss_wt=1., reg_loss_wt=1.,
ewma_decay=0.99, reg_loss_op=None):
assert(num_actions > 0), 'num_actions must be specified and must be > 0.'
with tf.name_scope('loss'):
if weights is None:
weight = tf.ones_like(actions_one_hot, dtype=tf.float32, name='weight')
actions_one_hot = tf.cast(tf.reshape(actions_one_hot, [-1, num_actions],
're_actions_one_hot'), tf.float32)
weights = tf.reduce_sum(tf.reshape(weights, [-1, num_actions], 're_weight'),
reduction_indices=1)
total = tf.reduce_sum(weights)
action_prob = tf.nn.softmax(logits)
action_prob = tf.reduce_sum(tf.multiply(action_prob, actions_one_hot),
reduction_indices=1)
example_loss = -tf.log(tf.maximum(tf.constant(1e-4), action_prob))
data_loss_op = tf.reduce_sum(example_loss * weights) / total
if reg_loss_op is None:
if reg_loss_wt > 0:
reg_loss_op = tf.add_n(tf.losses.get_regularization_losses())
else:
reg_loss_op = tf.constant(0.)
if reg_loss_wt > 0:
total_loss_op = data_loss_wt*data_loss_op + reg_loss_wt*reg_loss_op
else:
total_loss_op = data_loss_wt*data_loss_op
is_correct = tf.cast(tf.greater(action_prob, 0.5, name='pred_class'), tf.float32)
acc_op = tf.reduce_sum(is_correct*weights) / total
ewma_acc_op = moving_averages.weighted_moving_average(
acc_op, ewma_decay, weight=total, name='ewma_acc')
acc_ops = [ewma_acc_op]
return reg_loss_op, data_loss_op, total_loss_op, acc_ops
def get_repr_from_image(images_reshaped, modalities, data_augment, encoder,
freeze_conv, wt_decay, is_training):
# Pass image through lots of convolutional layers, to obtain pool5
if modalities == ['rgb']:
with tf.name_scope('pre_rgb'):
x = (images_reshaped + 128.) / 255. # Convert to brightness between 0 and 1.
if data_augment.relight and is_training:
x = tf_utils.distort_image(x, fast_mode=data_augment.relight_fast)
x = (x-0.5)*2.0
scope_name = encoder
elif modalities == ['depth']:
with tf.name_scope('pre_d'):
d_image = images_reshaped
x = 2*(d_image[...,0] - 80.0)/100.0
y = d_image[...,1]
d_image = tf.concat([tf.expand_dims(x, -1), tf.expand_dims(y, -1)], 3)
x = d_image
scope_name = 'd_'+encoder
resnet_is_training = is_training and (not freeze_conv)
with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(resnet_is_training)):
fn = getattr(tf_utils, encoder)
x, end_points = fn(x, num_classes=None, global_pool=False,
output_stride=None, reuse=None,
scope=scope_name)
vars_ = slim.get_variables_to_restore()
conv_feat = x
return conv_feat, vars_
def default_train_step_kwargs(m, obj, logdir, rng_seed, is_chief, num_steps,
iters, train_display_interval,
dagger_sample_bn_false):
train_step_kwargs = {}
train_step_kwargs['obj'] = obj
train_step_kwargs['m'] = m
# rng_data has 2 independent rngs, one for sampling episodes and one for
# sampling perturbs (so that we can make results reproducible.
train_step_kwargs['rng_data'] = [np.random.RandomState(rng_seed),
np.random.RandomState(rng_seed)]
train_step_kwargs['rng_action'] = np.random.RandomState(rng_seed)
if is_chief:
train_step_kwargs['writer'] = tf.summary.FileWriter(logdir) #, m.tf_graph)
else:
train_step_kwargs['writer'] = None
train_step_kwargs['iters'] = iters
train_step_kwargs['train_display_interval'] = train_display_interval
train_step_kwargs['num_steps'] = num_steps
train_step_kwargs['logdir'] = logdir
train_step_kwargs['dagger_sample_bn_false'] = dagger_sample_bn_false
return train_step_kwargs
# Utilities for visualizing and analysing validation output.
def save_d_at_t(outputs, global_step, output_dir, metric_summary, N):
"""Save distance to goal at all time steps.
Args:
outputs : [gt_dist_to_goal].
global_step : number of iterations.
output_dir : output directory.
metric_summary : to append scalars to summary.
N : number of outputs to process.
"""
d_at_t = np.concatenate(map(lambda x: x[0][:,:,0]*1, outputs), axis=0)
fig, axes = utils.subplot(plt, (1,1), (5,5))
axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.')
axes.set_xlabel('time step')
axes.set_ylabel('dist to next goal')
axes.grid('on')
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True)
plt.close(fig)
return None
def save_all(outputs, global_step, output_dir, metric_summary, N):
"""Save numerous statistics.
Args:
outputs : [locs, goal_loc, gt_dist_to_goal, node_ids, perturbs]
global_step : number of iterations.
output_dir : output directory.
metric_summary : to append scalars to summary.
N : number of outputs to process.
"""
all_locs = np.concatenate(map(lambda x: x[0], outputs), axis=0)
all_goal_locs = np.concatenate(map(lambda x: x[1], outputs), axis=0)
all_d_at_t = np.concatenate(map(lambda x: x[2][:,:,0]*1, outputs), axis=0)
all_node_ids = np.concatenate(map(lambda x: x[3], outputs), axis=0)
all_perturbs = np.concatenate(map(lambda x: x[4], outputs), axis=0)
file_name = os.path.join(output_dir, 'all_locs_at_t_{:d}.pkl'.format(global_step))
vars = [all_locs, all_goal_locs, all_d_at_t, all_node_ids, all_perturbs]
var_names = ['all_locs', 'all_goal_locs', 'all_d_at_t', 'all_node_ids', 'all_perturbs']
utils.save_variables(file_name, vars, var_names, overwrite=True)
return None
def eval_ap(outputs, global_step, output_dir, metric_summary, N, num_classes=4):
"""Processes the collected outputs to compute AP for action prediction.
Args:
outputs : [logits, labels]
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
num_classes : number of classes to compute AP over, and to reshape tensors.
"""
if N >= 0:
outputs = outputs[:N]
logits = np.concatenate(map(lambda x: x[0], outputs), axis=0).reshape((-1, num_classes))
labels = np.concatenate(map(lambda x: x[1], outputs), axis=0).reshape((-1, num_classes))
aps = []
for i in range(logits.shape[1]):
ap, rec, prec = utils.calc_pr(labels[:,i], logits[:,i])
ap = ap[0]
tf_utils.add_value_to_summary(metric_summary, 'aps/ap_{:d}: '.format(i), ap)
aps.append(ap)
return aps
def eval_dist(outputs, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs during validation to
1. Plot the distance over time curve.
2. Compute mean and median distances.
3. Plots histogram of end distances.
Args:
outputs : [locs, goal_loc, gt_dist_to_goal].
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
SUCCESS_THRESH = 3
if N >= 0:
outputs = outputs[:N]
# Plot distance at time t.
d_at_t = []
for i in range(len(outputs)):
locs, goal_loc, gt_dist_to_goal = outputs[i]
d_at_t.append(gt_dist_to_goal[:,:,0]*1)
# Plot the distance.
fig, axes = utils.subplot(plt, (1,1), (5,5))
d_at_t = np.concatenate(d_at_t, axis=0)
axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.')
axes.set_xlabel('time step')
axes.set_ylabel('dist to next goal')
axes.grid('on')
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True)
plt.close(fig)
# Plot the trajectories and the init_distance and final distance.
d_inits = []
d_ends = []
for i in range(len(outputs)):
locs, goal_loc, gt_dist_to_goal = outputs[i]
d_inits.append(gt_dist_to_goal[:,0,0]*1)
d_ends.append(gt_dist_to_goal[:,-1,0]*1)
# Plot the distance.
fig, axes = utils.subplot(plt, (1,1), (5,5))
d_inits = np.concatenate(d_inits, axis=0)
d_ends = np.concatenate(d_ends, axis=0)
axes.plot(d_inits+np.random.rand(*(d_inits.shape))-0.5,
d_ends+np.random.rand(*(d_ends.shape))-0.5, '.', mec='red', mew=1.0)
axes.set_xlabel('init dist'); axes.set_ylabel('final dist');
axes.grid('on'); axes.axis('equal');
title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}'
title_str = title_str.format(
np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75),
100*(np.mean(d_ends <= SUCCESS_THRESH)))
axes.set_title(title_str)
file_name = os.path.join(output_dir, 'dist_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_inits, d_ends], ['d_inits', 'd_ends'],
overwrite=True)
plt.close(fig)
# Plot the histogram of the end_distance.
with plt.style.context('seaborn-white'):
d_ends_ = np.sort(d_ends)
d_inits_ = np.sort(d_inits)
leg = [];
fig, ax = utils.subplot(plt, (1,1), (5,5))
ax.grid('on')
ax.set_xlabel('Distance from goal'); ax.xaxis.label.set_fontsize(16);
ax.set_ylabel('Fraction of data'); ax.yaxis.label.set_fontsize(16);
ax.plot(d_ends_, np.arange(d_ends_.size)*1./d_ends_.size, 'r')
ax.plot(d_inits_, np.arange(d_inits_.size)*1./d_inits_.size, 'k')
leg.append('Final'); leg.append('Init');
ax.legend(leg, fontsize='x-large');
ax.set_axis_on()
title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}'
title_str = title_str.format(
np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75),
100*(np.mean(d_ends <= SUCCESS_THRESH)))
ax.set_title(title_str)
file_name = os.path.join(output_dir, 'dist_hist_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
# Log distance metrics.
tf_utils.add_value_to_summary(metric_summary, 'dists/success_init: ',
100*(np.mean(d_inits <= SUCCESS_THRESH)))
tf_utils.add_value_to_summary(metric_summary, 'dists/success_end: ',
100*(np.mean(d_ends <= SUCCESS_THRESH)))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (75): ',
np.percentile(d_inits, q=75))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (75): ',
np.percentile(d_ends, q=75))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (median): ',
np.median(d_inits))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (median): ',
np.median(d_ends))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (mean): ',
np.mean(d_inits))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (mean): ',
np.mean(d_ends))
return np.median(d_inits), np.median(d_ends), np.mean(d_inits), np.mean(d_ends), \
np.percentile(d_inits, q=75), np.percentile(d_ends, q=75), \
100*(np.mean(d_inits) <= SUCCESS_THRESH), 100*(np.mean(d_ends) <= SUCCESS_THRESH)
def plot_trajectories(outputs, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs during validation to plot the trajectories
in the top view.
Args:
outputs : [locs, orig_maps, goal_loc].
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
if N >= 0:
outputs = outputs[:N]
N = len(outputs)
plt.set_cmap('gray')
fig, axes = utils.subplot(plt, (N, outputs[0][1].shape[0]), (5,5))
axes = axes.ravel()[::-1].tolist()
for i in range(N):
locs, orig_maps, goal_loc = outputs[i]
is_semantic = np.isnan(goal_loc[0,0,1])
for j in range(orig_maps.shape[0]):
ax = axes.pop();
ax.plot(locs[j,0,0], locs[j,0,1], 'ys')
# Plot one by one, so that they come in different colors.
for k in range(goal_loc.shape[1]):
if not is_semantic:
ax.plot(goal_loc[j,k,0], goal_loc[j,k,1], 's')
if False:
ax.plot(locs[j,:,0], locs[j,:,1], 'r.', ms=3)
ax.imshow(orig_maps[j,0,:,:,0], origin='lower')
ax.set_axis_off();
else:
ax.scatter(locs[j,:,0], locs[j,:,1], c=np.arange(locs.shape[1]),
cmap='jet', s=10, lw=0)
ax.imshow(orig_maps[j,0,:,:,0], origin='lower', vmin=-1.0, vmax=2.0)
if not is_semantic:
xymin = np.minimum(np.min(goal_loc[j,:,:], axis=0), np.min(locs[j,:,:], axis=0))
xymax = np.maximum(np.max(goal_loc[j,:,:], axis=0), np.max(locs[j,:,:], axis=0))
else:
xymin = np.min(locs[j,:,:], axis=0)
xymax = np.max(locs[j,:,:], axis=0)
xy1 = (xymax+xymin)/2. - np.maximum(np.max(xymax-xymin), 12)
xy2 = (xymax+xymin)/2. + np.maximum(np.max(xymax-xymin), 12)
ax.set_xlim([xy1[0], xy2[0]])
ax.set_ylim([xy1[1], xy2[1]])
ax.set_axis_off()
file_name = os.path.join(output_dir, 'trajectory_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
plt.close(fig)
return None
def add_default_summaries(mode, arop_full_summary_iters, summarize_ops,
summarize_names, to_aggregate, action_prob_op,
input_tensors, scope_name):
assert(mode == 'train' or mode == 'val' or mode == 'test'), \
'add_default_summaries mode is neither train or val or test.'
s_ops = tf_utils.get_default_summary_ops()
if mode == 'train':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
summarize_ops, summarize_names, mode, to_aggregate=False,
scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
elif mode == 'val':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
summarize_ops, summarize_names, mode, to_aggregate=to_aggregate,
scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
elif mode == 'test':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
[], [], mode, to_aggregate=[], scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
if mode == 'val':
arop = s_ops.additional_return_ops
arop += [[action_prob_op, input_tensors['train']['action']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['orig_maps'],
input_tensors['common']['goal_loc']]]
s_ops.arop_summary_iters += [-1, arop_full_summary_iters,
arop_full_summary_iters]
s_ops.arop_eval_fns += [eval_ap, eval_dist, plot_trajectories]
elif mode == 'test':
arop = s_ops.additional_return_ops
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal'],
input_tensors['step']['node_ids'],
input_tensors['step']['perturbs']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['orig_maps'],
input_tensors['common']['goal_loc']]]
s_ops.arop_summary_iters += [-1, -1, -1, arop_full_summary_iters]
s_ops.arop_eval_fns += [eval_dist, save_d_at_t, save_all,
plot_trajectories]
return s_ops
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import sys
import tensorflow as tf
import src.utils as utils
import logging
from tensorflow.contrib import slim
from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops
from tensorflow.contrib.slim import arg_scope
from tensorflow.contrib.slim.nets import resnet_v2
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope
sys.path.insert(0, '../slim')
from preprocessing import inception_preprocessing as ip
resnet_v2_50 = resnet_v2.resnet_v2_50
def custom_residual_block(x, neurons, kernel_size, stride, name, is_training,
wt_decay=0.0001, use_residual=True,
residual_stride_conv=True, conv_fn=slim.conv2d,
batch_norm_param=None):
# batch norm x and relu
init_var = np.sqrt(2.0/(kernel_size**2)/neurons)
with arg_scope([conv_fn],
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer()):
if batch_norm_param is None:
batch_norm_param = {'center': True, 'scale': False,
'activation_fn':tf.nn.relu,
'is_training': is_training}
y = slim.batch_norm(x, scope=name+'_bn', **batch_norm_param)
y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, stride=stride,
activation_fn=None, scope=name+'_1',
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param)
y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size,
stride=1, activation_fn=None, scope=name+'_2')
if use_residual:
if stride != 1 or x.get_shape().as_list()[-1] != neurons:
batch_norm_param_ = dict(batch_norm_param)
batch_norm_param_['activation_fn'] = None
x = conv_fn(x, num_outputs=neurons, kernel_size=1,
stride=stride if residual_stride_conv else 1,
activation_fn=None, scope=name+'_0_1x1',
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param_)
if not residual_stride_conv:
x = slim.avg_pool2d(x, 1, stride=stride, scope=name+'_0_avg')
y = tf.add(x, y, name=name+'_add')
return y
def step_gt_prob(step, step_number_op):
# Change samping probability from 1 to -1 at step steps.
with tf.name_scope('step_gt_prob'):
out = tf.cond(tf.less(step_number_op, step),
lambda: tf.constant(1.), lambda: tf.constant(-1.))
return out
def inverse_sigmoid_decay(k, global_step_op):
with tf.name_scope('inverse_sigmoid_decay'):
k = tf.constant(k, dtype=tf.float32)
tmp = k*tf.exp(-tf.cast(global_step_op, tf.float32)/k)
tmp = tmp / (1. + tmp)
return tmp
def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'):
""" Resample reward at particular locations.
Args:
im: ...xHxWxC matrix to sample from.
flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given
by the flow_im.
"""
with tf.name_scope(name):
valid_mask = None
x, y = tf.unstack(flow_im, axis=-1)
x = tf.cast(tf.reshape(x, [-1]), tf.float32)
y = tf.cast(tf.reshape(y, [-1]), tf.float32)
# constants
shape = tf.unstack(tf.shape(im))
channels = shape[-1]
width = shape[-2]
height = shape[-3]
num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32')
zero = tf.constant(0, dtype=tf.int32)
# Round up and down.
x0 = tf.cast(tf.floor(x), 'int32'); x1 = x0 + 1;
y0 = tf.cast(tf.floor(y), 'int32'); y1 = y0 + 1;
if output_valid_mask:
valid_mask = tf.logical_and(
tf.logical_and(tf.less_equal(x, tf.cast(width, tf.float32)-1.), tf.greater_equal(x, 0.)),
tf.logical_and(tf.less_equal(y, tf.cast(height, tf.float32)-1.), tf.greater_equal(y, 0.)))
valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1])
x0 = tf.clip_by_value(x0, zero, width-1)
x1 = tf.clip_by_value(x1, zero, width-1)
y0 = tf.clip_by_value(y0, zero, height-1)
y1 = tf.clip_by_value(y1, zero, height-1)
dim2 = width; dim1 = width * height;
# Create base index
base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1,1])
base = tf.reshape(tf.tile(base, [1, height*width]), shape=[-1])
base_y0 = base + y0 * dim2
base_y1 = base + y1 * dim2
idx_a = base_y0 + x0
idx_b = base_y1 + x0
idx_c = base_y0 + x1
idx_d = base_y1 + x1
# use indices to lookup pixels in the flat image and restore channels dim
sh = tf.stack([tf.constant(-1,dtype=tf.int32), channels])
im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32)
pixel_a = tf.gather(im_flat, idx_a)
pixel_b = tf.gather(im_flat, idx_b)
pixel_c = tf.gather(im_flat, idx_c)
pixel_d = tf.gather(im_flat, idx_d)
# and finally calculate interpolated values
x1_f = tf.to_float(x1)
y1_f = tf.to_float(y1)
wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1)
wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1)
wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1)
wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1)
output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d])
output = tf.reshape(output, shape=tf.shape(im))
return output, valid_mask
def get_flow(t, theta, map_size, name_scope='gen_flow'):
"""
Rotates the map by theta and translates the rotated map by t.
Assume that the robot rotates by an angle theta and then moves forward by
translation t. This function returns the flow field field. For every pixel in
the new image it tells us which pixel in the original image it came from:
NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)).
Assume there is a point p in the original image. Robot rotates by R and moves
forward by t. p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction.
So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function
calculates.
t: ... x 2 (translation for B batches of N motions each).
theta: ... x 1 (rotation for B batches of N motions each).
Output: ... x map_size x map_size x 2
"""
with tf.name_scope(name_scope):
tx, ty = tf.unstack(tf.reshape(t, shape=[-1, 1, 1, 1, 2]), axis=4)
theta = tf.reshape(theta, shape=[-1, 1, 1, 1])
c = tf.constant((map_size-1.)/2., dtype=tf.float32)
x, y = np.meshgrid(np.arange(map_size), np.arange(map_size))
x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x',
shape=[1, map_size, map_size, 1])
y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y',
shape=[1,map_size, map_size, 1])
x = x-(-tx+c)
y = y-(-ty+c)
sin_theta = tf.sin(theta)
cos_theta = tf.cos(theta)
xr = cos_theta*x - sin_theta*y
yr = sin_theta*x + cos_theta*y
xr = xr + c
yr = yr + c
flow = tf.stack([xr, yr], axis=-1)
sh = tf.unstack(tf.shape(t), axis=0)
sh = tf.stack(sh[:-1]+[tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]])
flow = tf.reshape(flow, shape=sh)
return flow
def distort_image(im, fast_mode=False):
# All images in the same batch are transformed the same way, but over
# iterations you see different distortions.
# im should be float with values between 0 and 1.
im_ = tf.reshape(im, shape=(-1,1,3))
im_ = ip.apply_with_random_selector(
im_, lambda x, ordering: ip.distort_color(x, ordering, fast_mode),
num_cases=4)
im_ = tf.reshape(im_, tf.shape(im))
return im_
def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0,
batch_norm_param=None, dropout_ratio=0.0, is_training=None):
if dropout_ratio > 0:
assert(is_training is not None), \
'is_training needs to be defined when trainnig with dropout.'
repr = []
for i, neuron in enumerate(neurons):
init_var = np.sqrt(2.0/neuron)
if batch_norm_param is not None:
x = slim.fully_connected(x, neuron, activation_fn=None,
weights_initializer=tf.random_normal_initializer(stddev=init_var),
weights_regularizer=slim.l2_regularizer(wt_decay),
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param,
biases_initializer=tf.zeros_initializer(),
scope='{:s}_{:d}'.format(name, offset+i))
else:
x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu,
weights_initializer=tf.random_normal_initializer(stddev=init_var),
weights_regularizer=slim.l2_regularizer(wt_decay),
biases_initializer=tf.zeros_initializer(),
scope='{:s}_{:d}'.format(name, offset+i))
if dropout_ratio > 0:
x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training,
scope='{:s}_{:d}'.format('dropout_'+name, offset+i))
repr.append(x)
if num_pred is not None:
init_var = np.sqrt(2.0/num_pred)
x = slim.fully_connected(x, num_pred,
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer(),
activation_fn=None,
scope='{:s}_pred'.format(name))
return x, repr
def concat_state_x_list(f, names):
af = {}
for i, k in enumerate(names):
af[k] = np.concatenate([x[i] for x in f], axis=1)
return af
def concat_state_x(f, names):
af = {}
for k in names:
af[k] = np.concatenate([x[k] for x in f], axis=1)
# af[k] = np.swapaxes(af[k], 0, 1)
return af
def sample_action(rng, action_probs, optimal_action, sample_gt_prob,
type='sample', combine_type='one_or_other'):
optimal_action_ = optimal_action/np.sum(optimal_action+0., 1, keepdims=True)
action_probs_ = action_probs/np.sum(action_probs+0.001, 1, keepdims=True)
batch_size = action_probs_.shape[0]
action = np.zeros((batch_size), dtype=np.int32)
action_sample_wt = np.zeros((batch_size), dtype=np.float32)
if combine_type == 'add':
sample_gt_prob_ = np.minimum(np.maximum(sample_gt_prob, 0.), 1.)
for i in range(batch_size):
if combine_type == 'one_or_other':
sample_gt = rng.rand() < sample_gt_prob
if sample_gt: distr_ = optimal_action_[i,:]*1.
else: distr_ = action_probs_[i,:]*1.
elif combine_type == 'add':
distr_ = optimal_action_[i,:]*sample_gt_prob_ + \
(1.-sample_gt_prob_)*action_probs_[i,:]
distr_ = distr_ / np.sum(distr_)
if type == 'sample':
action[i] = np.argmax(rng.multinomial(1, distr_, size=1))
elif type == 'argmax':
action[i] = np.argmax(distr_)
action_sample_wt[i] = action_probs_[i, action[i]] / distr_[action[i]]
return action, action_sample_wt
def train_step_custom_online_sampling(sess, train_op, global_step,
train_step_kwargs, mode='train'):
m = train_step_kwargs['m']
obj = train_step_kwargs['obj']
rng_data = train_step_kwargs['rng_data']
rng_action = train_step_kwargs['rng_action']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
num_steps = train_step_kwargs['num_steps']
logdir = train_step_kwargs['logdir']
dagger_sample_bn_false = train_step_kwargs['dagger_sample_bn_false']
train_display_interval = train_step_kwargs['train_display_interval']
if 'outputs' not in m.train_ops:
m.train_ops['outputs'] = []
s_ops = m.summary_ops[mode]
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
v_op = [_.value() for _ in v]
v_op_value = sess.run(v_op)
filter = lambda x, y: 'Adam' in x.name
# filter = lambda x, y: np.is_any_nan(y)
ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)]
v = [v[i] for i in ind]
v_op_value = [v_op_value[i] for i in ind]
for i in range(len(v)):
logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.',
v[i].name, np.any(np.isnan(v_op_value[i])),
np.linalg.norm(v_op_value[i]))
tt = utils.Timer()
for i in range(iters):
tt.tic()
# Sample a room.
e = obj.sample_env(rng_data)
# Initialize the agent.
init_env_state = e.reset(rng_data)
# Get and process the common data.
input = e.get_common_data()
input = e.pre_common_data(input)
feed_dict = prepare_feed_dict(m.input_tensors['common'], input)
if dagger_sample_bn_false:
feed_dict[m.train_ops['batch_norm_is_training_op']] = False
common_data = sess.run(m.train_ops['common'], feed_dict=feed_dict)
states = []
state_features = []
state_targets = []
net_state_to_input = []
step_data_cache = []
executed_actions = []
rewards = []
action_sample_wts = []
states.append(init_env_state)
net_state = sess.run(m.train_ops['init_state'], feed_dict=feed_dict)
net_state = dict(zip(m.train_ops['state_names'], net_state))
net_state_to_input.append(net_state)
for j in range(num_steps):
f = e.get_features(states[j], j)
f = e.pre_features(f)
f.update(net_state)
f['step_number'] = np.ones((1,1,1), dtype=np.int32)*j
state_features.append(f)
feed_dict = prepare_feed_dict(m.input_tensors['step'], state_features[-1])
optimal_action = e.get_optimal_action(states[j], j)
for x, v in zip(m.train_ops['common'], common_data):
feed_dict[x] = v
if dagger_sample_bn_false:
feed_dict[m.train_ops['batch_norm_is_training_op']] = False
outs = sess.run([m.train_ops['step'], m.sample_gt_prob_op,
m.train_ops['step_data_cache'],
m.train_ops['updated_state'],
m.train_ops['outputs']], feed_dict=feed_dict)
action_probs = outs[0]
sample_gt_prob = outs[1]
step_data_cache.append(dict(zip(m.train_ops['step_data_cache'], outs[2])))
net_state = outs[3]
if hasattr(e, 'update_state'):
outputs = outs[4]
outputs = dict(zip(m.train_ops['output_names'], outputs))
e.update_state(outputs, j)
state_targets.append(e.get_targets(states[j], j))
if j < num_steps-1:
# Sample from action_probs and optimal action.
action, action_sample_wt = sample_action(
rng_action, action_probs, optimal_action, sample_gt_prob,
m.sample_action_type, m.sample_action_combine_type)
next_state, reward = e.take_action(states[j], action, j)
executed_actions.append(action)
states.append(next_state)
rewards.append(reward)
action_sample_wts.append(action_sample_wt)
net_state = dict(zip(m.train_ops['state_names'], net_state))
net_state_to_input.append(net_state)
# Concatenate things together for training.
rewards = np.array(rewards).T
action_sample_wts = np.array(action_sample_wts).T
executed_actions = np.array(executed_actions).T
all_state_targets = concat_state_x(state_targets, e.get_targets_name())
all_state_features = concat_state_x(state_features,
e.get_features_name()+['step_number'])
# all_state_net = concat_state_x(net_state_to_input,
# m.train_ops['state_names'])
all_step_data_cache = concat_state_x(step_data_cache,
m.train_ops['step_data_cache'])
dict_train = dict(input)
dict_train.update(all_state_features)
dict_train.update(all_state_targets)
# dict_train.update(all_state_net)
dict_train.update(net_state_to_input[0])
dict_train.update(all_step_data_cache)
dict_train.update({'rewards': rewards,
'action_sample_wts': action_sample_wts,
'executed_actions': executed_actions})
feed_dict = prepare_feed_dict(m.input_tensors['train'], dict_train)
for x in m.train_ops['step_data_cache']:
feed_dict[x] = all_step_data_cache[x]
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops],
feed_dict=feed_dict)
logging.error("")
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, s_ops.summary_ops], feed_dict=feed_dict)
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode != 'train':
arop = [[] for j in range(len(s_ops.additional_return_ops))]
for j in range(len(s_ops.additional_return_ops)):
if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]:
arop[j] = s_ops.additional_return_ops[j]
val = sess.run(arop, feed_dict=feed_dict)
val_additional_ops.append(val)
tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters),
type='time')
if mode != 'train':
# Write the default val summaries.
summary, print_summary, np_global_step = sess.run(
[s_ops.summary_ops, s_ops.print_summary_ops, global_step])
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
# write custom validation ops
val_summarys = []
val_additional_ops = zip(*val_additional_ops)
if len(s_ops.arop_eval_fns) > 0:
val_metric_summary = tf.summary.Summary()
for i in range(len(s_ops.arop_eval_fns)):
val_summary = None
if s_ops.arop_eval_fns[i] is not None:
val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i],
np_global_step, logdir,
val_metric_summary,
s_ops.arop_summary_iters[i])
val_summarys.append(val_summary)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
# Return the additional val_ops
total_loss = (val_additional_ops, val_summarys)
should_stop = None
return total_loss, should_stop
def train_step_custom_v2(sess, train_op, global_step, train_step_kwargs,
mode='train'):
m = train_step_kwargs['m']
obj = train_step_kwargs['obj']
rng = train_step_kwargs['rng']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
logdir = train_step_kwargs['logdir']
train_display_interval = train_step_kwargs['train_display_interval']
s_ops = m.summary_ops[mode]
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
v_op = [_.value() for _ in v]
v_op_value = sess.run(v_op)
filter = lambda x, y: 'Adam' in x.name
# filter = lambda x, y: np.is_any_nan(y)
ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)]
v = [v[i] for i in ind]
v_op_value = [v_op_value[i] for i in ind]
for i in range(len(v)):
logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.',
v[i].name, np.any(np.isnan(v_op_value[i])),
np.linalg.norm(v_op_value[i]))
tt = utils.Timer()
for i in range(iters):
tt.tic()
e = obj.sample_env(rng)
rngs = e.gen_rng(rng)
input_data = e.gen_data(*rngs)
input_data = e.pre_data(input_data)
feed_dict = prepare_feed_dict(m.input_tensors, input_data)
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops],
feed_dict=feed_dict)
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, s_ops.summary_ops],
feed_dict=feed_dict)
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode != 'train':
arop = [[] for j in range(len(s_ops.additional_return_ops))]
for j in range(len(s_ops.additional_return_ops)):
if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]:
arop[j] = s_ops.additional_return_ops[j]
val = sess.run(arop, feed_dict=feed_dict)
val_additional_ops.append(val)
tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters),
type='time')
if mode != 'train':
# Write the default val summaries.
summary, print_summary, np_global_step = sess.run(
[s_ops.summary_ops, s_ops.print_summary_ops, global_step])
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
# write custom validation ops
val_summarys = []
val_additional_ops = zip(*val_additional_ops)
if len(s_ops.arop_eval_fns) > 0:
val_metric_summary = tf.summary.Summary()
for i in range(len(s_ops.arop_eval_fns)):
val_summary = None
if s_ops.arop_eval_fns[i] is not None:
val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i],
np_global_step, logdir,
val_metric_summary,
s_ops.arop_summary_iters[i])
val_summarys.append(val_summary)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
# Return the additional val_ops
total_loss = (val_additional_ops, val_summarys)
should_stop = None
return total_loss, should_stop
def train_step_custom(sess, train_op, global_step, train_step_kwargs,
mode='train'):
m = train_step_kwargs['m']
params = train_step_kwargs['params']
rng = train_step_kwargs['rng']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
gen_rng = train_step_kwargs['gen_rng']
logdir = train_step_kwargs['logdir']
gen_data = train_step_kwargs['gen_data']
pre_data = train_step_kwargs['pre_data']
train_display_interval = train_step_kwargs['train_display_interval']
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
for _ in v:
val = sess.run(_.value())
logging.info('variable: %30s, is_any_nan: %5s, norm: %f.', _.name,
np.any(np.isnan(val)), np.linalg.norm(val))
for i in range(iters):
rngs = gen_rng(params, rng)
input_data = gen_data(params, *rngs)
input_data = pre_data(params, input_data)
feed_dict = prepare_feed_dict(m.input_tensors, input_data)
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, m.summary_op[mode], m.print_summary_op[mode]],
feed_dict=feed_dict)
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, m.summary_op[mode]],
feed_dict=feed_dict)
if writer is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode == 'val':
val = sess.run(m.agg_update_op[mode] + m.additional_return_op[mode],
feed_dict=feed_dict)
val_additional_ops.append(val[len(m.agg_update_op[mode]):])
if mode == 'val':
summary, print_summary, np_global_step = sess.run(
[m.summary_op[mode], m.print_summary_op[mode], global_step])
if writer is not None:
writer.add_summary(summary, np_global_step)
sess.run([m.agg_reset_op[mode]])
# write custom validation ops
if m.eval_metrics_fn[mode] is not None:
val_metric_summary = m.eval_metrics_fn[mode](val_additional_ops,
np_global_step, logdir)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
total_loss = val_additional_ops
should_stop = None
return total_loss, should_stop
def setup_training(loss_op, initial_learning_rate, steps_per_decay,
learning_rate_decay, momentum, max_steps,
sync=False, adjust_lr_sync=True,
num_workers=1, replica_id=0, vars_to_optimize=None,
clip_gradient_norm=0, typ=None, momentum2=0.999,
adam_eps=1e-8):
if sync and adjust_lr_sync:
initial_learning_rate = initial_learning_rate * num_workers
max_steps = np.int(max_steps / num_workers)
steps_per_decay = np.int(steps_per_decay / num_workers)
global_step_op = slim.get_or_create_global_step()
lr_op = tf.train.exponential_decay(initial_learning_rate,
global_step_op, steps_per_decay, learning_rate_decay, staircase=True)
if typ == 'sgd':
optimizer = tf.train.MomentumOptimizer(lr_op, momentum)
elif typ == 'adam':
optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum,
beta2=momentum2, epsilon=adam_eps)
if sync:
sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer,
replicas_to_aggregate=num_workers,
replica_id=replica_id,
total_num_replicas=num_workers)
train_op = slim.learning.create_train_op(loss_op, sync_optimizer,
variables_to_train=vars_to_optimize,
clip_gradient_norm=clip_gradient_norm)
else:
sync_optimizer = None
train_op = slim.learning.create_train_op(loss_op, optimizer,
variables_to_train=vars_to_optimize,
clip_gradient_norm=clip_gradient_norm)
should_stop_op = tf.greater_equal(global_step_op, max_steps)
return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer
def add_value_to_summary(metric_summary, tag, val, log=True, tag_str=None):
"""Adds a scalar summary to the summary object. Optionally also logs to
logging."""
new_value = metric_summary.value.add();
new_value.tag = tag
new_value.simple_value = val
if log:
if tag_str is None:
tag_str = tag + '%f'
logging.info(tag_str, val)
def add_scalar_summary_op(tensor, name=None,
summary_key='summaries', print_summary_key='print_summaries', prefix=''):
collections = []
op = tf.summary.scalar(name, tensor, collections=collections)
if summary_key != print_summary_key:
tf.add_to_collection(summary_key, op)
op = tf.Print(op, [tensor], ' {:-<25s}: '.format(name) + prefix)
tf.add_to_collection(print_summary_key, op)
return op
def setup_inputs(inputs):
input_tensors = {}
input_shapes = {}
for (name, typ, sz) in inputs:
_ = tf.placeholder(typ, shape=sz, name=name)
input_tensors[name] = _
input_shapes[name] = sz
return input_tensors, input_shapes
def prepare_feed_dict(input_tensors, inputs):
feed_dict = {}
for n in input_tensors.keys():
feed_dict[input_tensors[n]] = inputs[n].astype(input_tensors[n].dtype.as_numpy_dtype)
return feed_dict
def simple_add_summaries(summarize_ops, summarize_names,
summary_key='summaries',
print_summary_key='print_summaries', prefix=''):
for op, name, in zip(summarize_ops, summarize_names):
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_op = tf.summary.merge_all(summary_key)
print_summary_op = tf.summary.merge_all(print_summary_key)
return summary_op, print_summary_op
def add_summary_ops(m, summarize_ops, summarize_names, to_aggregate=None,
summary_key='summaries',
print_summary_key='print_summaries', prefix=''):
if type(to_aggregate) != list:
to_aggregate = [to_aggregate for _ in summarize_ops]
# set up aggregating metrics
if np.any(to_aggregate):
agg_ops = []
for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate):
if to_agg:
# agg_ops.append(slim.metrics.streaming_mean(op, return_reset_op=True))
agg_ops.append(tf.contrib.metrics.streaming_mean(op))
# agg_ops.append(tf.contrib.metrics.streaming_mean(op, return_reset_op=True))
else:
agg_ops.append([None, None, None])
# agg_values_op, agg_update_op, agg_reset_op = zip(*agg_ops)
# agg_update_op = [x for x in agg_update_op if x is not None]
# agg_reset_op = [x for x in agg_reset_op if x is not None]
agg_values_op, agg_update_op = zip(*agg_ops)
agg_update_op = [x for x in agg_update_op if x is not None]
agg_reset_op = [tf.no_op()]
else:
agg_values_op = [None for _ in to_aggregate]
agg_update_op = [tf.no_op()]
agg_reset_op = [tf.no_op()]
for op, name, to_agg, agg_op in zip(summarize_ops, summarize_names, to_aggregate, agg_values_op):
if to_agg:
add_scalar_summary_op(agg_op, name, summary_key, print_summary_key, prefix)
else:
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_op = tf.summary.merge_all(summary_key)
print_summary_op = tf.summary.merge_all(print_summary_key)
return summary_op, print_summary_op, agg_update_op, agg_reset_op
def accum_val_ops(outputs, names, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs to compute AP for action prediction.
Args:
outputs : List of scalar ops to summarize.
names : Name of the scalar ops.
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
outs = []
if N >= 0:
outputs = outputs[:N]
for i in range(len(outputs[0])):
scalar = np.array(map(lambda x: x[i], outputs))
assert(scalar.ndim == 1)
add_value_to_summary(metric_summary, names[i], np.mean(scalar),
tag_str='{:>27s}: [{:s}]: %f'.format(names[i], ''))
outs.append(np.mean(scalar))
return outs
def get_default_summary_ops():
return utils.Foo(summary_ops=None, print_summary_ops=None,
additional_return_ops=[], arop_summary_iters=[],
arop_eval_fns=[])
def simple_summaries(summarize_ops, summarize_names, mode, to_aggregate=False,
scope_name='summary'):
if type(to_aggregate) != list:
to_aggregate = [to_aggregate for _ in summarize_ops]
summary_key = '{:s}_summaries'.format(mode)
print_summary_key = '{:s}_print_summaries'.format(mode)
prefix=' [{:s}]: '.format(mode)
# Default ops for things that dont need to be aggregated.
if not np.all(to_aggregate):
for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate):
if not to_agg:
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_ops = tf.summary.merge_all(summary_key)
print_summary_ops = tf.summary.merge_all(print_summary_key)
else:
summary_ops = tf.no_op()
print_summary_ops = tf.no_op()
# Default ops for things that dont need to be aggregated.
if np.any(to_aggregate):
additional_return_ops = [[summarize_ops[i]
for i, x in enumerate(to_aggregate )if x]]
arop_summary_iters = [-1]
s_names = ['{:s}/{:s}'.format(scope_name, summarize_names[i])
for i, x in enumerate(to_aggregate) if x]
fn = lambda outputs, global_step, output_dir, metric_summary, N: \
accum_val_ops(outputs, s_names, global_step, output_dir, metric_summary,
N)
arop_eval_fns = [fn]
else:
additional_return_ops = []
arop_summary_iters = []
arop_eval_fns = []
return summary_ops, print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import tensorflow as tf
from tensorflow.contrib import slim
import logging
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from src import utils
import src.file_utils as fu
import tfcode.nav_utils as nu
from tfcode import tf_utils
setup_train_step_kwargs = nu.default_train_step_kwargs
compute_losses_multi_or = nu.compute_losses_multi_or
get_repr_from_image = nu.get_repr_from_image
_save_d_at_t = nu.save_d_at_t
_save_all = nu.save_all
_eval_ap = nu.eval_ap
_eval_dist = nu.eval_dist
_plot_trajectories = nu.plot_trajectories
def lstm_online(cell_fn, num_steps, inputs, state, varscope):
# inputs is B x num_steps x C, C channels.
# state is 2 tuple with B x 1 x C1, B x 1 x C2
# Output state is always B x 1 x C
inputs = tf.unstack(inputs, axis=1, num=num_steps)
state = tf.unstack(state, axis=1, num=1)[0]
outputs = []
if num_steps > 1:
varscope.reuse_variables()
for s in range(num_steps):
output, state = cell_fn(inputs[s], state)
outputs.append(output)
outputs = tf.stack(outputs, axis=1)
state = tf.stack([state], axis=1)
return outputs, state
def _inputs(problem, lstm_states, lstm_state_dims):
# Set up inputs.
with tf.name_scope('inputs'):
n_views = problem.n_views
inputs = []
inputs.append(('orig_maps', tf.float32,
(problem.batch_size, 1, None, None, 1)))
inputs.append(('goal_loc', tf.float32,
(problem.batch_size, problem.num_goals, 2)))
# For initing LSTM.
inputs.append(('rel_goal_loc_at_start', tf.float32,
(problem.batch_size, problem.num_goals,
problem.rel_goal_loc_dim)))
common_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
inputs.append(('imgs', tf.float32, (problem.batch_size, None, n_views,
problem.img_height, problem.img_width,
problem.img_channels)))
# Goal location as a tuple of delta location and delta theta.
inputs.append(('rel_goal_loc', tf.float32, (problem.batch_size, None,
problem.rel_goal_loc_dim)))
if problem.outputs.visit_count:
inputs.append(('visit_count', tf.int32, (problem.batch_size, None, 1)))
inputs.append(('last_visit', tf.int32, (problem.batch_size, None, 1)))
for i, (state, dim) in enumerate(zip(lstm_states, lstm_state_dims)):
inputs.append((state, tf.float32, (problem.batch_size, 1, dim)))
if problem.outputs.egomotion:
inputs.append(('incremental_locs', tf.float32,
(problem.batch_size, None, 2)))
inputs.append(('incremental_thetas', tf.float32,
(problem.batch_size, None, 1)))
inputs.append(('step_number', tf.int32, (1, None, 1)))
inputs.append(('node_ids', tf.int32, (problem.batch_size, None,
problem.node_ids_dim)))
inputs.append(('perturbs', tf.float32, (problem.batch_size, None,
problem.perturbs_dim)))
# For plotting result plots
inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2)))
inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1)))
step_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
inputs.append(('executed_actions', tf.int32, (problem.batch_size, None)))
inputs.append(('rewards', tf.float32, (problem.batch_size, None)))
inputs.append(('action_sample_wts', tf.float32, (problem.batch_size, None)))
inputs.append(('action', tf.int32, (problem.batch_size, None,
problem.num_actions)))
train_data, _ = tf_utils.setup_inputs(inputs)
train_data.update(step_input_data)
train_data.update(common_input_data)
return common_input_data, step_input_data, train_data
def _add_summaries(m, summary_mode, arop_full_summary_iters):
summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op,
m.total_loss_op, m.data_loss_op, m.reg_loss_op] + m.acc_ops
summarize_names = ['lr', 'global_step', 'sample_gt_prob_op', 'total_loss',
'data_loss', 'reg_loss'] + \
['acc_{:d}'.format(i) for i in range(len(m.acc_ops))]
to_aggregate = [0, 0, 0, 1, 1, 1] + [1]*len(m.acc_ops)
scope_name = 'summary'
with tf.name_scope(scope_name):
s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters,
summarize_ops, summarize_names,
to_aggregate, m.action_prob_op,
m.input_tensors, scope_name=scope_name)
m.summary_ops = {summary_mode: s_ops}
def visit_count_fc(visit_count, last_visit, embed_neurons, wt_decay, fc_dropout):
with tf.variable_scope('embed_visit_count'):
visit_count = tf.reshape(visit_count, shape=[-1])
last_visit = tf.reshape(last_visit, shape=[-1])
visit_count = tf.clip_by_value(visit_count, clip_value_min=-1,
clip_value_max=15)
last_visit = tf.clip_by_value(last_visit, clip_value_min=-1,
clip_value_max=15)
visit_count = tf.one_hot(visit_count, depth=16, axis=1, dtype=tf.float32,
on_value=10., off_value=0.)
last_visit = tf.one_hot(last_visit, depth=16, axis=1, dtype=tf.float32,
on_value=10., off_value=0.)
f = tf.concat([visit_count, last_visit], 1)
x, _ = tf_utils.fc_network(
f, neurons=embed_neurons, wt_decay=wt_decay, name='visit_count_embed',
offset=0, batch_norm_param=None, dropout_ratio=fc_dropout,
is_training=is_training)
return x
def lstm_setup(name, x, batch_size, is_single_step, lstm_dim, lstm_out,
num_steps, state_input_op):
# returns state_name, state_init_op, updated_state_op, out_op
with tf.name_scope('reshape_'+name):
sh = x.get_shape().as_list()
x = tf.reshape(x, shape=[batch_size, -1, sh[-1]])
with tf.variable_scope(name) as varscope:
cell = tf.contrib.rnn.LSTMCell(
num_units=lstm_dim, forget_bias=1.0, state_is_tuple=False,
num_proj=lstm_out, use_peepholes=True,
initializer=tf.random_uniform_initializer(-0.01, 0.01, seed=0),
cell_clip=None, proj_clip=None)
sh = [batch_size, 1, lstm_dim+lstm_out]
state_init_op = tf.constant(0., dtype=tf.float32, shape=sh)
fn = lambda ns: lstm_online(cell, ns, x, state_input_op, varscope)
out_op, updated_state_op = tf.cond(is_single_step, lambda: fn(1), lambda:
fn(num_steps))
return name, state_init_op, updated_state_op, out_op
def combine_setup(name, combine_type, embed_img, embed_goal, num_img_neuorons=None,
num_goal_neurons=None):
with tf.name_scope(name + '_' + combine_type):
if combine_type == 'add':
# Simple concat features from goal and image
out = embed_img + embed_goal
elif combine_type == 'multiply':
# Multiply things together
re_embed_img = tf.reshape(
embed_img, shape=[-1, num_img_neuorons / num_goal_neurons,
num_goal_neurons])
re_embed_goal = tf.reshape(embed_goal, shape=[-1, num_goal_neurons, 1])
x = tf.matmul(re_embed_img, re_embed_goal, transpose_a=False, transpose_b=False)
out = slim.flatten(x)
elif combine_type == 'none' or combine_type == 'imgonly':
out = embed_img
elif combine_type == 'goalonly':
out = embed_goal
else:
logging.fatal('Undefined combine_type: %s', combine_type)
return out
def preprocess_egomotion(locs, thetas):
with tf.name_scope('pre_ego'):
pre_ego = tf.concat([locs, tf.sin(thetas), tf.cos(thetas)], 2)
sh = pre_ego.get_shape().as_list()
pre_ego = tf.reshape(pre_ego, [-1, sh[-1]])
return pre_ego
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
# Set up the model.
tf.set_random_seed(args.solver.seed)
task_params = args.navtask.task_params
num_steps = task_params.num_steps
num_goals = task_params.num_goals
num_actions = task_params.num_actions
num_actions_ = num_actions
n_views = task_params.n_views
batch_norm_is_training_op = \
tf.placeholder_with_default(batch_norm_is_training, shape=[],
name='batch_norm_is_training_op')
# Setup the inputs
m.input_tensors = {}
lstm_states = []; lstm_state_dims = [];
state_names = []; updated_state_ops = []; init_state_ops = [];
if args.arch.lstm_output:
lstm_states += ['lstm_output']
lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions]
if args.arch.lstm_ego:
lstm_states += ['lstm_ego']
lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out]
lstm_states += ['lstm_img']
lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
elif args.arch.lstm_img:
# An LSTM only on the image
lstm_states += ['lstm_img']
lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
else:
# No LSTMs involved here.
None
m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \
_inputs(task_params, lstm_states, lstm_state_dims)
with tf.name_scope('check_size'):
is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']),
num=6)[1], 1)
images_reshaped = tf.reshape(m.input_tensors['step']['imgs'],
shape=[-1, task_params.img_height, task_params.img_width,
task_params.img_channels], name='re_image')
rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'],
shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc')
x, vars_ = get_repr_from_image(
images_reshaped, task_params.modalities, task_params.data_augment,
args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay,
is_training)
# Reshape into nice things so that these can be accumulated over time steps
# for faster backprop.
sh_before = x.get_shape().as_list()
m.encoder_output = tf.reshape(
x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:])
x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:])
# Add a layer to reduce dimensions for a fc layer.
if args.arch.dim_reduce_neurons > 0:
ks = 1; neurons = args.arch.dim_reduce_neurons;
init_var = np.sqrt(2.0/(ks**2)/neurons)
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.conv_feat = slim.conv2d(
x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce',
weights_regularizer=slim.l2_regularizer(args.solver.wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var))
reshape_conv_feat = slim.flatten(m.conv_feat)
sh = reshape_conv_feat.get_shape().as_list()
m.reshape_conv_feat = tf.reshape(reshape_conv_feat,
shape=[-1, sh[1]*n_views])
# Restore these from a checkpoint.
if args.solver.pretrained_path is not None:
m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
vars_)
else:
m.init_fn = None
# Hit the goal_location with a bunch of fully connected layers, to embed it
# into some space.
with tf.variable_scope('embed_goal'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_goal, _ = tf_utils.fc_network(
rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons,
wt_decay=args.solver.wt_decay, name='goal_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
if args.arch.embed_goal_for_state:
with tf.variable_scope('embed_goal_for_state'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_goal_for_state, _ = tf_utils.fc_network(
m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:],
neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay,
name='goal_embed', offset=0, batch_norm_param=batch_norm_param,
dropout_ratio=args.arch.fc_dropout, is_training=is_training)
# Hit the goal_location with a bunch of fully connected layers, to embed it
# into some space.
with tf.variable_scope('embed_img'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_img, _ = tf_utils.fc_network(
m.reshape_conv_feat, neurons=args.arch.img_embed_neurons,
wt_decay=args.solver.wt_decay, name='img_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
# For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an
# LSTM, combine with image features and accumulate those in an LSTM. Finally
# combine what you get from the image LSTM with the goal to output an action.
if args.arch.lstm_ego:
ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'],
m.input_tensors['step']['incremental_thetas'])
with tf.variable_scope('embed_ego'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_ego, _ = tf_utils.fc_network(
ego_reshaped, neurons=args.arch.ego_embed_neurons,
wt_decay=args.solver.wt_decay, name='ego_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step,
args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals,
m.input_tensors['step']['lstm_ego'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
# Combine the output with the vision features.
m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego,
m.embed_img, out_op,
args.arch.img_embed_neurons[-1],
args.arch.lstm_ego_out)
# LSTM on these vision features.
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step,
args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
m.input_tensors['step']['lstm_img'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
m.img_for_goal = out_op
num_img_for_goal_neurons = args.arch.lstm_img_out
elif args.arch.lstm_img:
# LSTM on just the image features.
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_img', m.embed_img, task_params.batch_size, is_single_step,
args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
m.input_tensors['step']['lstm_img'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
m.img_for_goal = out_op
num_img_for_goal_neurons = args.arch.lstm_img_out
else:
m.img_for_goal = m.embed_img
num_img_for_goal_neurons = args.arch.img_embed_neurons[-1]
if args.arch.use_visit_count:
m.embed_visit_count = visit_count_fc(
m.input_tensors['step']['visit_count'],
m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons,
args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training)
m.embed_goal = m.embed_goal + m.embed_visit_count
m.combined_f = combine_setup('img_goal', args.arch.combine_type,
m.img_for_goal, m.embed_goal,
num_img_for_goal_neurons,
args.arch.goal_embed_neurons[-1])
# LSTM on the combined representation.
if args.arch.lstm_output:
name = 'lstm_output'
# A few fully connected layers here.
with tf.variable_scope('action_pred'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
x, _ = tf_utils.fc_network(
m.combined_f, neurons=args.arch.pred_neurons,
wt_decay=args.solver.wt_decay, name='pred', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout)
if args.arch.lstm_output_init_state_from_goal:
# Use the goal embedding to initialize the LSTM state.
# UGLY CLUGGY HACK: if this is doing computation for a single time step
# then this will not involve back prop, so we can use the state input from
# the feed dict, otherwise we compute the state representation from the
# goal and feed that in. Necessary for using goal location to generate the
# state representation.
m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1)
state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name],
lambda: m.embed_goal_for_state)
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
name, x, task_params.batch_size, is_single_step,
args.arch.lstm_output_dim,
num_actions_,
num_steps*num_goals, state_op)
init_state_ops += [m.embed_goal_for_state]
else:
state_op = m.input_tensors['step'][name]
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
name, x, task_params.batch_size, is_single_step,
args.arch.lstm_output_dim,
num_actions_, num_steps*num_goals, state_op)
init_state_ops += [state_init_op]
state_names += [state_name]
updated_state_ops += [updated_state_op]
out_op = tf.reshape(out_op, shape=[-1, num_actions_])
if num_actions_ > num_actions:
m.action_logits_op = out_op[:,:num_actions]
m.baseline_op = out_op[:,num_actions:]
else:
m.action_logits_op = out_op
m.baseline_op = None
m.action_prob_op = tf.nn.softmax(m.action_logits_op)
else:
# A few fully connected layers here.
with tf.variable_scope('action_pred'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
out_op, _ = tf_utils.fc_network(
m.combined_f, neurons=args.arch.pred_neurons,
wt_decay=args.solver.wt_decay, name='pred', offset=0,
num_pred=num_actions_,
batch_norm_param=batch_norm_param,
dropout_ratio=args.arch.fc_dropout, is_training=is_training)
if num_actions_ > num_actions:
m.action_logits_op = out_op[:,:num_actions]
m.baseline_op = out_op[:,num_actions:]
else:
m.action_logits_op = out_op
m.baseline_op = None
m.action_prob_op = tf.nn.softmax(m.action_logits_op)
m.train_ops = {}
m.train_ops['step'] = m.action_prob_op
m.train_ops['common'] = [m.input_tensors['common']['orig_maps'],
m.input_tensors['common']['goal_loc'],
m.input_tensors['common']['rel_goal_loc_at_start']]
m.train_ops['state_names'] = state_names
m.train_ops['init_state'] = init_state_ops
m.train_ops['updated_state'] = updated_state_ops
m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op
# Flat list of ops which cache the step data.
m.train_ops['step_data_cache'] = [tf.no_op()]
if args.solver.freeze_conv:
m.train_ops['step_data_cache'] = [m.encoder_output]
else:
m.train_ops['step_data_cache'] = []
ewma_decay = 0.99 if is_training else 0.0
weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32,
name='weight')
m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
compute_losses_multi_or(
m.action_logits_op, m.input_tensors['train']['action'],
weights=weight, num_actions=num_actions,
data_loss_wt=args.solver.data_loss_wt,
reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay)
if args.solver.freeze_conv:
vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_))
else:
vars_to_optimize = None
m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
m.sync_optimizer = tf_utils.setup_training(
m.total_loss_op,
args.solver.initial_learning_rate,
args.solver.steps_per_decay,
args.solver.learning_rate_decay,
args.solver.momentum,
args.solver.max_steps,
args.solver.sync,
args.solver.adjust_lr_sync,
args.solver.num_workers,
args.solver.task,
vars_to_optimize=vars_to_optimize,
clip_gradient_norm=args.solver.clip_gradient_norm,
typ=args.solver.typ, momentum2=args.solver.momentum2,
adam_eps=args.solver.adam_eps)
if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k,
m.global_step_op)
elif args.arch.sample_gt_prob_type == 'zero':
m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)
elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
step = int(args.arch.sample_gt_prob_type.split('_')[1])
m.sample_gt_prob_op = tf_utils.step_gt_prob(
step, m.input_tensors['step']['step_number'][0,0,0])
m.sample_action_type = args.arch.action_sample_type
m.sample_action_combine_type = args.arch.action_sample_combine_type
_add_summaries(m, summary_mode, args.summary.arop_full_summary_iters)
m.init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
write_version=tf.train.SaverDef.V2)
return m
# Image Compression with Neural Networks # Compression with Neural Networks
This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and This is a [TensorFlow](http://www.tensorflow.org/) model repo containing
decompressing images using an already trained Residual GRU model as descibed research on compression with neural networks. This repo currently contains
in [Full Resolution Image Compression with Recurrent Neural Networks] code for the following papers:
(https://arxiv.org/abs/1608.05148). Please consult the paper for more details
on the architecture and compression results.
This code will allow you to perform the lossy compression on an model [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148)
already trained on compression. This code doesn't not currently contain the
Entropy Coding portions of our paper.
## Organization
[Image Encoder](image_encoder/): Encoding and decoding images into their binary representation.
## Prerequisites [Entropy Coder](entropy_coder/): Lossless compression of the binary representation.
The only software requirements for running the encoder and decoder is having
Tensorflow installed. You will also need to [download]
(http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz)
and extract the model residual_gru.pb.
If you want to generate the perceptual similarity under MS-SSIM, you will also
need to [Install SciPy](https://www.scipy.org/install.html).
## Encoding
The Residual GRU network is fully convolutional, but requires the images
height and width in pixels by a multiple of 32. There is an image in this folder
called example.png that is 768x1024 if one is needed for testing. We also
rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at
time of release.
To encode an image, simply run the following command:
`python encoder.py --input_image=/your/image/here.png
--output_codes=output_codes.npz --iteration=15
--model=/path/to/model/residual_gru.pb
`
The iteration parameter specifies the lossy-quality to target for compression.
The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per
pixel) bpp and every increment results in an additional 1/8 bpp.
| Iteration | BPP | Compression Ratio |
|---: |---: |---: |
|0 | 0.125 | 192:1|
|1 | 0.250 | 96:1|
|2 | 0.375 | 64:1|
|3 | 0.500 | 48:1|
|4 | 0.625 | 38.4:1|
|5 | 0.750 | 32:1|
|6 | 0.875 | 27.4:1|
|7 | 1.000 | 24:1|
|8 | 1.125 | 21.3:1|
|9 | 1.250 | 19.2:1|
|10 | 1.375 | 17.4:1|
|11 | 1.500 | 16:1|
|12 | 1.625 | 14.7:1|
|13 | 1.750 | 13.7:1|
|14 | 1.875 | 12.8:1|
|15 | 2.000 | 12:1|
The output_codes file contains the numpy shape and a flattened, bit-packed
array of the codes. These can be inspected in python by using numpy.load().
## Decoding
After generating codes for an image, the lossy reconstructions for that image
can be done as follows:
`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/
--model=residual_gru.pb`
The output_directory will contain images decoded at each quality level.
## Comparing Similarity
One of our primary metrics for comparing how similar two images are
is MS-SSIM.
To generate these metrics on your images you can run:
`python msssim.py --original_image=/path/to/your/image.png
--compared_image=/tmp/decoded/image_15.png`
## Results
CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can
are available for reference. Each row of the CSV represents each of the Kodak
images in their dataset number (1-24). Each column of the CSV represents each
iteration of the model (1-16).
[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv)
[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv)
## FAQ
#### How do I train my own compression network?
We currently don't provide the code to build and train a compression
graph from scratch.
#### I get an InvalidArgumentError: Incompatible shapes.
This is usually due to the fact that our network only supports images that are
both height and width divisible by 32 pixel. Try padding your images to 32
pixel boundaries.
## Contact Info ## Contact Info
Model repository maintained by Nick Johnston ([nickj-google](https://github.com/nickj-google)). Model repository maintained by Nick Johnston ([nmjohn](https://github.com/nmjohn)).
# Neural net based entropy coding
This is a [TensorFlow](http://www.tensorflow.org/) model for additional
lossless compression of bitstreams generated by neural net based image
encoders as described in
[https://arxiv.org/abs/1703.10114](https://arxiv.org/abs/1703.10114).
To be more specific, the entropy coder aims at compressing further binary
codes which have a 3D tensor structure with:
* the first two dimensions of the tensors corresponding to the height and
the width of the binary codes,
* the last dimension being the depth of the codes. The last dimension can be
sliced into N groups of K, where each additional group is used by the image
decoder to add more details to the reconstructed image.
The code in this directory only contains the underlying code probability model
but does not perform the actual compression using arithmetic coding.
The code probability model is enough to compute the theoretical compression
ratio.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed.
You will also need to add the top level source directory of the entropy coder
to your `PYTHONPATH`, for example:
`export PYTHONPATH=${PYTHONPATH}:/tmp/models/compression`
## Training the entropy coder
### Synthetic dataset
If you do not have a training dataset, there is a simple code generative model
that you can use to generate a dataset and play with the entropy coder.
The generative model is located under dataset/gen\_synthetic\_dataset.py. Note
that this simple generative model is not going to give good results on real
images as it is not supposed to be close to the statistics of the binary
representation of encoded images. Consider it as a toy dataset, no more, no
less.
To generate a synthetic dataset with 20000 samples:
`mkdir -p /tmp/dataset`
`python ./dataset/gen_synthetic_dataset.py --dataset_dir=/tmp/dataset/
--count=20000`
Note that the generator has not been optimized at all, generating the synthetic
dataset is currently pretty slow.
### Training
If you just want to play with the entropy coder trainer, here is the command
line that can be used to train the entropy coder on the synthetic dataset:
`mkdir -p /tmp/entropy_coder_train`
`python ./core/entropy_coder_train.py --task=0
--train_dir=/tmp/entropy_coder_train/
--model=progressive
--model_config=./configs/synthetic/model_config.json
--train_config=./configs/synthetic/train_config.json
--input_config=./configs/synthetic/input_config.json
`
Training is configured using 3 files formatted using JSON:
* One file is used to configure the underlying entropy coder model.
Currently, only the *progressive* model is supported.
This model takes 2 mandatory parameters and an optional one:
* `layer_depth`: the number of bits per layer (a.k.a. iteration).
Background: the image decoder takes each layer to add more detail
to the image.
* `layer_count`: the maximum number of layers that should be supported
by the model. This should be equal or greater than the maximum number
of layers in the input binary codes.
* `coded_layer_count`: This can be used to consider only partial codes,
keeping only the first `coded_layer_count` layers and ignoring the
remaining layers. If left empty, the binary codes are left unchanged.
* One file to configure the training, including the learning rate, ...
The meaning of the parameters are pretty straightforward. Note that this
file is only used during training and is not needed during inference.
* One file to specify the input dataset to use during training.
The dataset is formatted using tf.RecordIO.
## Inference: file size after entropy coding.
### Using a synthetic sample
Here is the command line to generate a single synthetic sample formatted
in the same way as what is provided by the image encoder:
`python ./dataset/gen_synthetic_single.py
--sample_filename=/tmp/dataset/sample_0000.npz`
To actually compute the additional compression ratio using the entropy coder
trained in the previous step:
`python ./core/entropy_coder_single.py
--model=progressive
--model_config=./configs/synthetic/model_config.json
--input_codes=/tmp/dataset/sample_0000.npz
--checkpoint=/tmp/entropy_coder_train/model.ckpt-209078`
where the checkpoint number should be adjusted accordingly.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment