Commit 44fa1d37 authored by Alex Lee's avatar Alex Lee
Browse files

Merge remote-tracking branch 'upstream/master'

parents d3628a74 6e367f67
### Pre-Trained Models
We provide the following pre-trained models:
Config Name | Checkpoint | Mean Dist. | 50%ile Dist. | 75%ile Dist. | Success %age |
:-: | :-: | :-: | :-: | :-: | :-: |
cmp.lmap_Msc.clip5.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r.tar) | 4.79 | 0 | 1 | 78.9 |
cmp.lmap_Msc.clip5.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_r2r.tar) | 7.74 | 0 | 14 | 62.4 |
cmp.lmap_Msc.clip5.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_ST.tar) | 10.67 | 9 | 19 | 39.7 |
cmp.lmap_Msc.clip5.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_ST.tar) | 11.27 | 10 | 19 | 35.6 |
cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80.tar) | 11.6 | 0 | 19 | 66.9 |
bl.v2.noclip.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r.tar) | 5.90 | 0 | 6 | 71.2 |
bl.v2.noclip.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_r2r.tar) | 10.21 | 1 | 21 | 53.4 |
bl.v2.noclip.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_ST.tar) | 13.29 | 14 | 23 | 28.0 |
bl.v2.noclip.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_ST.tar) | 13.37 | 13 | 20 | 24.2 |
bl.v2.noclip.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r_h0_64_80.tar) | 15.30 | 0 | 29 | 57.9 |
10c10
< from OpenGL import platform, constant, arrays
---
> from OpenGL import platform, constant, arrays, contextdata
249a250
> from OpenGL._bytes import _NULL_8_BYTE
399c400
< array = ArrayDatatype.asArray( pointer, type )
---
> array = arrays.ArrayDatatype.asArray( pointer, type )
405c406
< ArrayDatatype.voidDataPointer( array )
---
> arrays.ArrayDatatype.voidDataPointer( array )
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
echo $VIRTUAL_ENV
patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/GLES2/VERSION/GLES2_2_0.py patches/GLES2_2_0.py.patch
patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/platform/ctypesloader.py patches/ctypesloader.py.patch
45c45,46
< return dllType( name, mode )
---
> print './' + name
> return dllType( './' + name, mode )
47,48c48,53
< err.args += (name,fullName)
< raise
---
> try:
> print name
> return dllType( name, mode )
> except:
> err.args += (name,fullName)
> raise
// This shader computes per-pixel depth (-z coordinate in the camera space, or
// orthogonal distance to the camera plane). The result is multiplied by the
// `kFixedPointFraction` constant and is encoded to RGB channels as an integer
// (R being the least significant byte).
#ifdef GL_ES
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif
#endif
const float kFixedPointFraction = 1000.0;
varying float vDepth;
void main(void) {
float d = vDepth;
// Encode the depth to RGB.
d *= (kFixedPointFraction / 255.0);
gl_FragColor.r = mod(d, 1.0);
d = (d - gl_FragColor.r) / 255.0;
gl_FragColor.g = mod(d, 1.0);
d = (d - gl_FragColor.g) / 255.0;
gl_FragColor.b = mod(d, 1.0);
gl_FragColor.a = 1.0;
}
uniform mat4 uViewMatrix;
uniform mat4 uProjectionMatrix;
attribute vec3 aPosition;
varying float vDepth;
void main(void) {
vec4 worldPosition = vec4(aPosition, 1.0);
vec4 viewPosition = uViewMatrix * worldPosition;
gl_Position = uProjectionMatrix * viewPosition;
// Orthogonal depth is simply -z in the camera space.
vDepth = -viewPosition.z;
}
precision highp float;
varying vec4 vColor;
varying vec2 vTextureCoord;
uniform sampler2D uTexture;
void main(void) {
vec4 color = vColor;
color = texture2D(uTexture, vTextureCoord);
gl_FragColor = color;
}
uniform mat4 uViewMatrix;
uniform mat4 uProjectionMatrix;
uniform vec4 uColor;
attribute vec4 aColor;
attribute vec3 aPosition;
attribute vec2 aTextureCoord;
varying vec4 vColor;
varying vec2 vTextureCoord;
void main(void) {
vec4 worldPosition = vec4(aPosition, 1.0);
gl_Position = uProjectionMatrix * (uViewMatrix * worldPosition);
vColor = aColor * uColor;
vTextureCoord = aTextureCoord;
}
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Implements loading and rendering of meshes. Contains 2 classes:
Shape: Class that exposes high level functions for loading and manipulating
shapes. This currently is bound to assimp
(https://github.com/assimp/assimp). If you want to interface to a different
library, reimplement this class with bindings to your mesh loading library.
SwiftshaderRenderer: Class that renders Shapes. Currently this uses python
bindings to OpenGL (EGL), bindings to an alternate renderer may be implemented
here.
"""
import numpy as np, os
import cv2, ctypes, logging, os, numpy as np
import pyassimp as assimp
from OpenGL.GLES2 import *
from OpenGL.EGL import *
import src.rotation_utils as ru
__version__ = 'swiftshader_renderer'
def get_shaders(modalities):
rgb_shader = 'rgb_flat_color' if 'rgb' in modalities else None
d_shader = 'depth_rgb_encoded' if 'depth' in modalities else None
return rgb_shader, d_shader
def sample_points_on_faces(vs, fs, rng, n_samples_per_face):
idx = np.repeat(np.arange(fs.shape[0]), n_samples_per_face)
r = rng.rand(idx.size, 2)
r1 = r[:,:1]; r2 = r[:,1:]; sqrt_r1 = np.sqrt(r1);
v1 = vs[fs[idx, 0], :]; v2 = vs[fs[idx, 1], :]; v3 = vs[fs[idx, 2], :];
pts = (1-sqrt_r1)*v1 + sqrt_r1*(1-r2)*v2 + sqrt_r1*r2*v3
v1 = vs[fs[:,0], :]; v2 = vs[fs[:, 1], :]; v3 = vs[fs[:, 2], :];
ar = 0.5*np.sqrt(np.sum(np.cross(v1-v3, v2-v3)**2, 1))
return pts, ar, idx
class Shape():
def get_pyassimp_load_options(self):
load_flags = assimp.postprocess.aiProcess_Triangulate;
load_flags = load_flags | assimp.postprocess.aiProcess_SortByPType;
load_flags = load_flags | assimp.postprocess.aiProcess_OptimizeMeshes;
load_flags = load_flags | assimp.postprocess.aiProcess_RemoveRedundantMaterials;
load_flags = load_flags | assimp.postprocess.aiProcess_FindDegenerates;
load_flags = load_flags | assimp.postprocess.aiProcess_GenSmoothNormals;
load_flags = load_flags | assimp.postprocess.aiProcess_JoinIdenticalVertices;
load_flags = load_flags | assimp.postprocess.aiProcess_ImproveCacheLocality;
load_flags = load_flags | assimp.postprocess.aiProcess_GenUVCoords;
load_flags = load_flags | assimp.postprocess.aiProcess_FindInvalidData;
return load_flags
def __init__(self, obj_file, material_file=None, load_materials=True,
name_prefix='', name_suffix=''):
if material_file is not None:
logging.error('Ignoring material file input, reading them off obj file.')
load_flags = self.get_pyassimp_load_options()
scene = assimp.load(obj_file, processing=load_flags)
filter_ind = self._filter_triangles(scene.meshes)
self.meshes = [scene.meshes[i] for i in filter_ind]
for m in self.meshes:
m.name = name_prefix + m.name + name_suffix
dir_name = os.path.dirname(obj_file)
# Load materials
materials = None
if load_materials:
materials = []
for m in self.meshes:
file_name = os.path.join(dir_name, m.material.properties[('file', 1)])
assert(os.path.exists(file_name)), \
'Texture file {:s} foes not exist.'.format(file_name)
img_rgb = cv2.imread(file_name)[::-1,:,::-1]
if img_rgb.shape[0] != img_rgb.shape[1]:
logging.warn('Texture image not square.')
sz = np.maximum(img_rgb.shape[0], img_rgb.shape[1])
sz = int(np.power(2., np.ceil(np.log2(sz))))
img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR)
else:
sz = img_rgb.shape[0]
sz_ = int(np.power(2., np.ceil(np.log2(sz))))
if sz != sz_:
logging.warn('Texture image not square of power of 2 size. ' +
'Changing size from %d to %d.', sz, sz_)
sz = sz_
img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR)
materials.append(img_rgb)
self.scene = scene
self.materials = materials
def _filter_triangles(self, meshes):
select = []
for i in range(len(meshes)):
if meshes[i].primitivetypes == 4:
select.append(i)
return select
def flip_shape(self):
for m in self.meshes:
m.vertices[:,1] = -m.vertices[:,1]
bb = m.faces*1
bb[:,1] = m.faces[:,2]
bb[:,2] = m.faces[:,1]
m.faces = bb
# m.vertices[:,[0,1]] = m.vertices[:,[1,0]]
def get_vertices(self):
vs = []
for m in self.meshes:
vs.append(m.vertices)
vss = np.concatenate(vs, axis=0)
return vss, vs
def get_faces(self):
vs = []
for m in self.meshes:
v = m.faces
vs.append(v)
return vs
def get_number_of_meshes(self):
return len(self.meshes)
def scale(self, sx=1., sy=1., sz=1.):
pass
def sample_points_on_face_of_shape(self, i, n_samples_per_face, sc):
v = self.meshes[i].vertices*sc
f = self.meshes[i].faces
p, face_areas, face_idx = sample_points_on_faces(
v, f, np.random.RandomState(0), n_samples_per_face)
return p, face_areas, face_idx
def __del__(self):
scene = self.scene
assimp.release(scene)
class SwiftshaderRenderer():
def __init__(self):
self.entities = {}
def init_display(self, width, height, fov, z_near, z_far, rgb_shader,
d_shader):
self.init_renderer_egl(width, height)
dir_path = os.path.dirname(os.path.realpath(__file__))
if d_shader is not None and rgb_shader is not None:
logging.fatal('Does not support setting both rgb_shader and d_shader.')
if d_shader is not None:
assert rgb_shader is None
shader = d_shader
self.modality = 'depth'
if rgb_shader is not None:
assert d_shader is None
shader = rgb_shader
self.modality = 'rgb'
self.create_shaders(os.path.join(dir_path, shader+'.vp'),
os.path.join(dir_path, shader + '.fp'))
aspect = width*1./(height*1.)
self.set_camera(fov, z_near, z_far, aspect)
def init_renderer_egl(self, width, height):
major,minor = ctypes.c_long(),ctypes.c_long()
logging.info('init_renderer_egl: EGL_DEFAULT_DISPLAY: %s', EGL_DEFAULT_DISPLAY)
egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY)
logging.info('init_renderer_egl: egl_display: %s', egl_display)
eglInitialize(egl_display, major, minor)
logging.info('init_renderer_egl: EGL_OPENGL_API, EGL_OPENGL_ES_API: %s, %s',
EGL_OPENGL_API, EGL_OPENGL_ES_API)
eglBindAPI(EGL_OPENGL_ES_API)
num_configs = ctypes.c_long()
configs = (EGLConfig*1)()
local_attributes = [EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8,
EGL_DEPTH_SIZE, 16, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, EGL_NONE,]
logging.error('init_renderer_egl: local attributes: %s', local_attributes)
local_attributes = arrays.GLintArray.asArray(local_attributes)
success = eglChooseConfig(egl_display, local_attributes, configs, 1, num_configs)
logging.error('init_renderer_egl: eglChooseConfig success, num_configs: %d, %d', success, num_configs.value)
egl_config = configs[0]
context_attributes = [EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE]
context_attributes = arrays.GLintArray.asArray(context_attributes)
egl_context = eglCreateContext(egl_display, egl_config, EGL_NO_CONTEXT, context_attributes)
buffer_attributes = [EGL_WIDTH, width, EGL_HEIGHT, height, EGL_NONE]
buffer_attributes = arrays.GLintArray.asArray(buffer_attributes)
egl_surface = eglCreatePbufferSurface(egl_display, egl_config, buffer_attributes)
eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context)
logging.error("init_renderer_egl: egl_display: %s egl_surface: %s, egl_config: %s", egl_display, egl_surface, egl_context)
glViewport(0, 0, width, height);
self.egl_display = egl_display
self.egl_surface = egl_surface
self.egl_config = egl_config
self.egl_mapping = {}
self.render_timer = None
self.load_timer = None
self.height = height
self.width = width
def create_shaders(self, v_shader_file, f_shader_file):
v_shader = glCreateShader(GL_VERTEX_SHADER)
with open(v_shader_file, 'r') as f:
ls = ''
for l in f:
ls = ls + l
glShaderSource(v_shader, ls)
glCompileShader(v_shader);
assert(glGetShaderiv(v_shader, GL_COMPILE_STATUS) == 1)
f_shader = glCreateShader(GL_FRAGMENT_SHADER)
with open(f_shader_file, 'r') as f:
ls = ''
for l in f:
ls = ls + l
glShaderSource(f_shader, ls)
glCompileShader(f_shader);
assert(glGetShaderiv(f_shader, GL_COMPILE_STATUS) == 1)
egl_program = glCreateProgram();
assert(egl_program)
glAttachShader(egl_program, v_shader)
glAttachShader(egl_program, f_shader)
glLinkProgram(egl_program);
assert(glGetProgramiv(egl_program, GL_LINK_STATUS) == 1)
glUseProgram(egl_program)
glBindAttribLocation(egl_program, 0, "aPosition")
glBindAttribLocation(egl_program, 1, "aColor")
glBindAttribLocation(egl_program, 2, "aTextureCoord")
self.egl_program = egl_program
self.egl_mapping['vertexs'] = 0
self.egl_mapping['vertexs_color'] = 1
self.egl_mapping['vertexs_tc'] = 2
glClearColor(0.0, 0.0, 0.0, 1.0);
# glEnable(GL_CULL_FACE); glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
def set_camera(self, fov_vertical, z_near, z_far, aspect):
width = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near*aspect;
height = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near;
egl_program = self.egl_program
c = np.eye(4, dtype=np.float32)
c[3,3] = 0
c[3,2] = -1
c[2,2] = -(z_near+z_far)/(z_far-z_near)
c[2,3] = -2.0*(z_near*z_far)/(z_far-z_near)
c[0,0] = 2.0*z_near/width
c[1,1] = 2.0*z_near/height
c = c.T
projection_matrix_o = glGetUniformLocation(egl_program, 'uProjectionMatrix')
projection_matrix = np.eye(4, dtype=np.float32)
projection_matrix[...] = c
projection_matrix = np.reshape(projection_matrix, (-1))
glUniformMatrix4fv(projection_matrix_o, 1, GL_FALSE, projection_matrix)
def load_default_object(self):
v = np.array([[0.0, 0.5, 0.0, 1.0, 1.0, 0.0, 1.0],
[-0.5, -0.5, 0.0, 1.0, 0.0, 1.0, 1.0],
[0.5, -0.5, 0.0, 1.0, 1.0, 1.0, 1.0]], dtype=np.float32)
v = np.concatenate((v,v+0.1), axis=0)
v = np.ascontiguousarray(v, dtype=np.float32)
vbo = glGenBuffers(1)
glBindBuffer (GL_ARRAY_BUFFER, vbo)
glBufferData (GL_ARRAY_BUFFER, v.dtype.itemsize*v.size, v, GL_STATIC_DRAW)
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(0))
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(12))
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
self.num_to_render = 6;
def _actual_render(self):
for entity_id, entity in self.entities.iteritems():
if entity['visible']:
vbo = entity['vbo']
tbo = entity['tbo']
num = entity['num']
glBindBuffer(GL_ARRAY_BUFFER, vbo)
glVertexAttribPointer(self.egl_mapping['vertexs'], 3, GL_FLOAT, GL_FALSE,
20, ctypes.c_void_p(0))
glVertexAttribPointer(self.egl_mapping['vertexs_tc'], 2, GL_FLOAT,
GL_FALSE, 20, ctypes.c_void_p(12))
glEnableVertexAttribArray(self.egl_mapping['vertexs']);
glEnableVertexAttribArray(self.egl_mapping['vertexs_tc']);
glBindTexture(GL_TEXTURE_2D, tbo)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glDrawArrays(GL_TRIANGLES, 0, num)
def render(self, take_screenshot=False, output_type=0):
# self.render_timer.tic()
self._actual_render()
# self.render_timer.toc(log_at=1000, log_str='render timer', type='time')
np_rgb_img = None
np_d_img = None
c = 1000.
if take_screenshot:
if self.modality == 'rgb':
screenshot_rgba = np.zeros((self.height, self.width, 4), dtype=np.uint8)
glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_rgba)
np_rgb_img = screenshot_rgba[::-1,:,:3];
if self.modality == 'depth':
screenshot_d = np.zeros((self.height, self.width, 4), dtype=np.uint8)
glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_d)
np_d_img = screenshot_d[::-1,:,:3];
np_d_img = np_d_img[:,:,2]*(255.*255./c) + np_d_img[:,:,1]*(255./c) + np_d_img[:,:,0]*(1./c)
np_d_img = np_d_img.astype(np.float32)
np_d_img[np_d_img == 0] = np.NaN
np_d_img = np_d_img[:,:,np.newaxis]
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
return np_rgb_img, np_d_img
def _load_mesh_into_gl(self, mesh, material):
vvt = np.concatenate((mesh.vertices, mesh.texturecoords[0,:,:2]), axis=1)
vvt = np.ascontiguousarray(vvt[mesh.faces.reshape((-1)),:], dtype=np.float32)
num = vvt.shape[0]
vvt = np.reshape(vvt, (-1))
vbo = glGenBuffers(1)
glBindBuffer(GL_ARRAY_BUFFER, vbo)
glBufferData(GL_ARRAY_BUFFER, vvt.dtype.itemsize*vvt.size, vvt, GL_STATIC_DRAW)
tbo = glGenTextures(1)
glBindTexture(GL_TEXTURE_2D, tbo)
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, material.shape[1],
material.shape[0], 0, GL_RGB, GL_UNSIGNED_BYTE,
np.reshape(material, (-1)))
return num, vbo, tbo
def load_shapes(self, shapes):
entities = self.entities
entity_ids = []
for i, shape in enumerate(shapes):
for j in range(len(shape.meshes)):
name = shape.meshes[j].name
assert name not in entities, '{:s} entity already exists.'.format(name)
num, vbo, tbo = self._load_mesh_into_gl(shape.meshes[j], shape.materials[j])
entities[name] = {'num': num, 'vbo': vbo, 'tbo': tbo, 'visible': False}
entity_ids.append(name)
return entity_ids
def set_entity_visible(self, entity_ids, visibility):
for entity_id in entity_ids:
self.entities[entity_id]['visible'] = visibility
def position_camera(self, camera_xyz, lookat_xyz, up):
camera_xyz = np.array(camera_xyz)
lookat_xyz = np.array(lookat_xyz)
up = np.array(up)
lookat_to = lookat_xyz - camera_xyz
lookat_from = np.array([0, 1., 0.])
up_from = np.array([0, 0., 1.])
up_to = up * 1.
# np.set_printoptions(precision=2, suppress=True)
# print up_from, lookat_from, up_to, lookat_to
r = ru.rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to)
R = np.eye(4, dtype=np.float32)
R[:3,:3] = r
t = np.eye(4, dtype=np.float32)
t[:3,3] = -camera_xyz
view_matrix = np.dot(R.T, t)
flip_yz = np.eye(4, dtype=np.float32)
flip_yz[1,1] = 0; flip_yz[2,2] = 0; flip_yz[1,2] = 1; flip_yz[2,1] = -1;
view_matrix = np.dot(flip_yz, view_matrix)
view_matrix = view_matrix.T
# print np.concatenate((R, t, view_matrix), axis=1)
view_matrix = np.reshape(view_matrix, (-1))
view_matrix_o = glGetUniformLocation(self.egl_program, 'uViewMatrix')
glUniformMatrix4fv(view_matrix_o, 1, GL_FALSE, view_matrix)
return None, None #camera_xyz, q
def clear_scene(self):
keys = self.entities.keys()
for entity_id in keys:
entity = self.entities.pop(entity_id, None)
vbo = entity['vbo']
tbo = entity['tbo']
num = entity['num']
glDeleteBuffers(1, [vbo])
glDeleteTextures(1, [tbo])
def __del__(self):
self.clear_scene()
eglMakeCurrent(self.egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT)
eglDestroySurface(self.egl_display, self.egl_surface)
eglTerminate(self.egl_display)
numpy
pillow
PyOpenGL
PyOpenGL-accelerate
six
networkx
scikit-image
scipy
opencv-python
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r""" Script to setup the grid moving agent.
blaze build --define=ION_GFX_OGLES20=1 -c opt --copt=-mavx --config=cuda_clang \
learning/brain/public/tensorflow_std_server{,_gpu} \
experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill.par \
experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill
./blaze-bin/experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill \
--logdir=/cns/iq-d/home/saurabhgupta/output/stanford-distill/local/v0/ \
--config_name 'v0+train' --gfs_user robot-intelligence-gpu
"""
import sys, os, numpy as np
import copy
import argparse, pprint
import time
import cProfile
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.python.framework import ops
from tensorflow.contrib.framework.python.ops import variables
import logging
from tensorflow.python.platform import gfile
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from cfgs import config_distill
from tfcode import tf_utils
import src.utils as utils
import src.file_utils as fu
import tfcode.distillation as distill
import datasets.nav_env as nav_env
FLAGS = flags.FLAGS
flags.DEFINE_string('master', 'local',
'The name of the TensorFlow master to use.')
flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the '
'value is 0, then the parameters are handled locally by '
'the worker.')
flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training '
'with multiple workers to identify each worker.')
flags.DEFINE_integer('num_workers', 1, '')
flags.DEFINE_string('config_name', '', '')
flags.DEFINE_string('logdir', '', '')
def main(_):
args = config_distill.get_args_for_config(FLAGS.config_name)
args.logdir = FLAGS.logdir
args.solver.num_workers = FLAGS.num_workers
args.solver.task = FLAGS.task
args.solver.ps_tasks = FLAGS.ps_tasks
args.solver.master = FLAGS.master
args.buildinger.env_class = nav_env.MeshMapper
fu.makedirs(args.logdir)
args.buildinger.logdir = args.logdir
R = nav_env.get_multiplexor_class(args.buildinger, args.solver.task)
if False:
pr = cProfile.Profile()
pr.enable()
rng = np.random.RandomState(0)
for i in range(1):
b, instances_perturbs = R.sample_building(rng)
inputs = b.worker(*(instances_perturbs))
for j in range(inputs['imgs'].shape[0]):
p = os.path.join('tmp', '{:d}.png'.format(j))
img = inputs['imgs'][j,0,:,:,:3]*1
img = (img).astype(np.uint8)
fu.write_image(p, img)
print(inputs['imgs'].shape)
inputs = R.pre(inputs)
pr.disable()
pr.print_stats(2)
if args.control.train:
if not gfile.Exists(args.logdir):
gfile.MakeDirs(args.logdir)
m = utils.Foo()
m.tf_graph = tf.Graph()
config = tf.ConfigProto()
config.device_count['GPU'] = 1
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.8
with m.tf_graph.as_default():
with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks)):
m = distill.setup_to_run(m, args, is_training=True,
batch_norm_is_training=True)
train_step_kwargs = distill.setup_train_step_kwargs_mesh(
m, R, os.path.join(args.logdir, 'train'),
rng_seed=args.solver.task, is_chief=args.solver.task==0, iters=1,
train_display_interval=args.summary.display_interval)
final_loss = slim.learning.train(
train_op=m.train_op,
logdir=args.logdir,
master=args.solver.master,
is_chief=args.solver.task == 0,
number_of_steps=args.solver.max_steps,
train_step_fn=tf_utils.train_step_custom,
train_step_kwargs=train_step_kwargs,
global_step=m.global_step_op,
init_op=m.init_op,
init_fn=m.init_fn,
sync_optimizer=m.sync_optimizer,
saver=m.saver_op,
summary_op=None, session_config=config)
if args.control.test:
m = utils.Foo()
m.tf_graph = tf.Graph()
checkpoint_dir = os.path.join(format(args.logdir))
with m.tf_graph.as_default():
m = distill.setup_to_run(m, args, is_training=False,
batch_norm_is_training=args.control.force_batchnorm_is_training_at_test)
train_step_kwargs = distill.setup_train_step_kwargs_mesh(
m, R, os.path.join(args.logdir, args.control.test_name),
rng_seed=args.solver.task+1, is_chief=args.solver.task==0,
iters=args.summary.test_iters, train_display_interval=None)
sv = slim.learning.supervisor.Supervisor(
graph=ops.get_default_graph(), logdir=None, init_op=m.init_op,
summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op)
last_checkpoint = None
while True:
last_checkpoint = slim.evaluation.wait_for_new_checkpoint(checkpoint_dir, last_checkpoint)
checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1])
start = time.time()
logging.info('Starting evaluation at %s using checkpoint %s.',
time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()),
last_checkpoint)
config = tf.ConfigProto()
config.device_count['GPU'] = 1
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.8
with sv.managed_session(args.solver.master,config=config,
start_standard_services=False) as sess:
sess.run(m.init_op)
sv.saver.restore(sess, last_checkpoint)
sv.start_queue_runners(sess)
vals, _ = tf_utils.train_step_custom(
sess, None, m.global_step_op, train_step_kwargs, mode='val')
if checkpoint_iter >= args.solver.max_steps:
break
if __name__ == '__main__':
app.run()
# Script to download models to initialize the RGB and D models for training.We
# use ResNet-v2-50 for both modalities.
mkdir -p data/init_models
cd data/init_models
# RGB Models are initialized by pre-training on ImageNet.
mkdir -p resnet_v2_50
RGB_URL="http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz"
wget $RGB_URL
tar -xf resnet_v2_50_2017_04_14.tar.gz -C resnet_v2_50
# Depth models are initialized by distilling the RGB model to D images using
# Cross-Modal Distillation (https://arxiv.org/abs/1507.00448).
mkdir -p distill_rgb_to_d_resnet_v2_50
D_URL="http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/distill_rgb_to_d_resnet_v2_50.tar"
wget $D_URL
tar -xf distill_rgb_to_d_resnet_v2_50.tar -C distill_rgb_to_d_resnet_v2_50
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A simple python function to walk in the enviornments that we have created.
PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_env_vis.py \
--dataset_name sbpd --building_name area3
"""
import sys
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
from PIL import ImageTk, Image
import Tkinter as tk
import logging
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
import datasets.nav_env_config as nec
import datasets.nav_env as nav_env
import cv2
from datasets import factory
import render.swiftshader_renderer as renderer
SwiftshaderRenderer = renderer.SwiftshaderRenderer
VisualNavigationEnv = nav_env.VisualNavigationEnv
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset_name', 'sbpd', 'Name of the dataset.')
flags.DEFINE_float('fov', 60., 'Field of view')
flags.DEFINE_integer('image_size', 512, 'Size of the image.')
flags.DEFINE_string('building_name', '', 'Name of the building.')
def get_args():
navtask = nec.nav_env_base_config()
navtask.task_params.type = 'rng_rejection_sampling_many'
navtask.task_params.rejection_sampling_M = 2000
navtask.task_params.min_dist = 10
sz = FLAGS.image_size
navtask.camera_param.fov = FLAGS.fov
navtask.camera_param.height = sz
navtask.camera_param.width = sz
navtask.task_params.img_height = sz
navtask.task_params.img_width = sz
# navtask.task_params.semantic_task.class_map_names = ['chair', 'door', 'table']
# navtask.task_params.type = 'to_nearest_obj_acc'
logging.info('navtask: %s', navtask)
return navtask
def load_building(dataset_name, building_name):
dataset = factory.get_dataset(dataset_name)
navtask = get_args()
cp = navtask.camera_param
rgb_shader, d_shader = renderer.get_shaders(cp.modalities)
r_obj = SwiftshaderRenderer()
r_obj.init_display(width=cp.width, height=cp.height,
fov=cp.fov, z_near=cp.z_near, z_far=cp.z_far,
rgb_shader=rgb_shader, d_shader=d_shader)
r_obj.clear_scene()
b = VisualNavigationEnv(robot=navtask.robot, env=navtask.env,
task_params=navtask.task_params,
building_name=building_name, flip=False,
logdir=None, building_loader=dataset,
r_obj=r_obj)
b.load_building_into_scene()
b.set_building_visibility(False)
return b
def walk_through(b):
# init agent at a random location in the environment.
init_env_state = b.reset([np.random.RandomState(0), np.random.RandomState(0)])
global current_node
rng = np.random.RandomState(0)
current_node = rng.choice(b.task.nodes.shape[0])
root = tk.Tk()
image = b.render_nodes(b.task.nodes[[current_node],:])[0]
print image.shape
image = image.astype(np.uint8)
im = Image.fromarray(image)
im = ImageTk.PhotoImage(im)
panel = tk.Label(root, image=im)
map_size = b.traversible.shape
sc = np.max(map_size)/256.
loc = np.array([[map_size[1]/2., map_size[0]/2.]])
x_axis = np.zeros_like(loc); x_axis[:,1] = sc
y_axis = np.zeros_like(loc); y_axis[:,0] = -sc
cum_fs, cum_valid = nav_env.get_map_to_predict(loc, x_axis, y_axis,
map=b.traversible*1.,
map_size=256)
cum_fs = cum_fs[0]
cum_fs = cv2.applyColorMap((cum_fs*255).astype(np.uint8), cv2.COLORMAP_JET)
im = Image.fromarray(cum_fs)
im = ImageTk.PhotoImage(im)
panel_overhead = tk.Label(root, image=im)
def refresh():
global current_node
image = b.render_nodes(b.task.nodes[[current_node],:])[0]
image = image.astype(np.uint8)
im = Image.fromarray(image)
im = ImageTk.PhotoImage(im)
panel.configure(image=im)
panel.image = im
def left_key(event):
global current_node
current_node = b.take_action([current_node], [2], 1)[0][0]
refresh()
def up_key(event):
global current_node
current_node = b.take_action([current_node], [3], 1)[0][0]
refresh()
def right_key(event):
global current_node
current_node = b.take_action([current_node], [1], 1)[0][0]
refresh()
def quit(event):
root.destroy()
panel_overhead.grid(row=4, column=5, rowspan=1, columnspan=1,
sticky=tk.W+tk.E+tk.N+tk.S)
panel.bind('<Left>', left_key)
panel.bind('<Up>', up_key)
panel.bind('<Right>', right_key)
panel.bind('q', quit)
panel.focus_set()
panel.grid(row=0, column=0, rowspan=5, columnspan=5,
sticky=tk.W+tk.E+tk.N+tk.S)
root.mainloop()
def simple_window():
root = tk.Tk()
image = np.zeros((128, 128, 3), dtype=np.uint8)
image[32:96, 32:96, 0] = 255
im = Image.fromarray(image)
im = ImageTk.PhotoImage(im)
image = np.zeros((128, 128, 3), dtype=np.uint8)
image[32:96, 32:96, 1] = 255
im2 = Image.fromarray(image)
im2 = ImageTk.PhotoImage(im2)
panel = tk.Label(root, image=im)
def left_key(event):
panel.configure(image=im2)
panel.image = im2
def quit(event):
sys.exit()
panel.bind('<Left>', left_key)
panel.bind('<Up>', left_key)
panel.bind('<Down>', left_key)
panel.bind('q', quit)
panel.focus_set()
panel.pack(side = "bottom", fill = "both", expand = "yes")
root.mainloop()
def main(_):
b = load_building(FLAGS.dataset_name, FLAGS.building_name)
walk_through(b)
if __name__ == '__main__':
app.run()
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r""" Script to train and test the grid navigation agent.
Usage:
1. Testing a model.
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \
PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \
--config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r
2. Training a model (locally).
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \
PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \
--config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_
3. Training a model (distributed).
# See https://www.tensorflow.org/deploy/distributed on how to setup distributed
# training.
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \
PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \
--config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \
--logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_ \
--ps_tasks $num_ps --master $master_name --task $worker_id
"""
import sys, os, numpy as np
import copy
import argparse, pprint
import time
import cProfile
import platform
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.python.framework import ops
from tensorflow.contrib.framework.python.ops import variables
import logging
from tensorflow.python.platform import gfile
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from cfgs import config_cmp
from cfgs import config_vision_baseline
import datasets.nav_env as nav_env
import src.file_utils as fu
import src.utils as utils
import tfcode.cmp as cmp
from tfcode import tf_utils
from tfcode import vision_baseline_lstm
FLAGS = flags.FLAGS
flags.DEFINE_string('master', '',
'The address of the tensorflow master')
flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the '
'value is 0, then the parameters are handled locally by '
'the worker.')
flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training '
'with multiple workers to identify each worker.')
flags.DEFINE_integer('num_workers', 1, '')
flags.DEFINE_string('config_name', '', '')
flags.DEFINE_string('logdir', '', '')
flags.DEFINE_integer('solver_seed', 0, '')
flags.DEFINE_integer('delay_start_iters', 20, '')
logging.basicConfig(level=logging.INFO)
def main(_):
_launcher(FLAGS.config_name, FLAGS.logdir)
def _launcher(config_name, logdir):
args = _setup_args(config_name, logdir)
fu.makedirs(args.logdir)
if args.control.train:
_train(args)
if args.control.test:
_test(args)
def get_args_for_config(config_name):
configs = config_name.split('.')
type = configs[0]
config_name = '.'.join(configs[1:])
if type == 'cmp':
args = config_cmp.get_args_for_config(config_name)
args.setup_to_run = cmp.setup_to_run
args.setup_train_step_kwargs = cmp.setup_train_step_kwargs
elif type == 'bl':
args = config_vision_baseline.get_args_for_config(config_name)
args.setup_to_run = vision_baseline_lstm.setup_to_run
args.setup_train_step_kwargs = vision_baseline_lstm.setup_train_step_kwargs
else:
logging.fatal('Unknown type: {:s}'.format(type))
return args
def _setup_args(config_name, logdir):
args = get_args_for_config(config_name)
args.solver.num_workers = FLAGS.num_workers
args.solver.task = FLAGS.task
args.solver.ps_tasks = FLAGS.ps_tasks
args.solver.master = FLAGS.master
args.solver.seed = FLAGS.solver_seed
args.logdir = logdir
args.navtask.logdir = None
return args
def _train(args):
container_name = ""
R = lambda: nav_env.get_multiplexer_class(args.navtask, args.solver.task)
m = utils.Foo()
m.tf_graph = tf.Graph()
config = tf.ConfigProto()
config.device_count['GPU'] = 1
with m.tf_graph.as_default():
with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks,
merge_devices=True)):
with tf.container(container_name):
m = args.setup_to_run(m, args, is_training=True,
batch_norm_is_training=True, summary_mode='train')
train_step_kwargs = args.setup_train_step_kwargs(
m, R(), os.path.join(args.logdir, 'train'), rng_seed=args.solver.task,
is_chief=args.solver.task==0,
num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals, iters=1,
train_display_interval=args.summary.display_interval,
dagger_sample_bn_false=args.arch.dagger_sample_bn_false)
delay_start = (args.solver.task*(args.solver.task+1))/2 * FLAGS.delay_start_iters
logging.error('delaying start for task %d by %d steps.',
args.solver.task, delay_start)
additional_args = {}
final_loss = slim.learning.train(
train_op=m.train_op,
logdir=args.logdir,
master=args.solver.master,
is_chief=args.solver.task == 0,
number_of_steps=args.solver.max_steps,
train_step_fn=tf_utils.train_step_custom_online_sampling,
train_step_kwargs=train_step_kwargs,
global_step=m.global_step_op,
init_op=m.init_op,
init_fn=m.init_fn,
sync_optimizer=m.sync_optimizer,
saver=m.saver_op,
startup_delay_steps=delay_start,
summary_op=None, session_config=config, **additional_args)
def _test(args):
args.solver.master = ''
container_name = ""
checkpoint_dir = os.path.join(format(args.logdir))
logging.error('Checkpoint_dir: %s', args.logdir)
config = tf.ConfigProto();
config.device_count['GPU'] = 1;
m = utils.Foo()
m.tf_graph = tf.Graph()
rng_data_seed = 0; rng_action_seed = 0;
R = lambda: nav_env.get_multiplexer_class(args.navtask, rng_data_seed)
with m.tf_graph.as_default():
with tf.container(container_name):
m = args.setup_to_run(
m, args, is_training=False,
batch_norm_is_training=args.control.force_batchnorm_is_training_at_test,
summary_mode=args.control.test_mode)
train_step_kwargs = args.setup_train_step_kwargs(
m, R(), os.path.join(args.logdir, args.control.test_name),
rng_seed=rng_data_seed, is_chief=True,
num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals,
iters=args.summary.test_iters, train_display_interval=None,
dagger_sample_bn_false=args.arch.dagger_sample_bn_false)
saver = slim.learning.tf_saver.Saver(variables.get_variables_to_restore())
sv = slim.learning.supervisor.Supervisor(
graph=ops.get_default_graph(), logdir=None, init_op=m.init_op,
summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op)
last_checkpoint = None
reported = False
while True:
last_checkpoint_ = None
while last_checkpoint_ is None:
last_checkpoint_ = slim.evaluation.wait_for_new_checkpoint(
checkpoint_dir, last_checkpoint, seconds_to_sleep=10, timeout=60)
if last_checkpoint_ is None: break
last_checkpoint = last_checkpoint_
checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1])
logging.info('Starting evaluation at %s using checkpoint %s.',
time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()),
last_checkpoint)
if (args.control.only_eval_when_done == False or
checkpoint_iter >= args.solver.max_steps):
start = time.time()
logging.info('Starting evaluation at %s using checkpoint %s.',
time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()),
last_checkpoint)
with sv.managed_session(args.solver.master, config=config,
start_standard_services=False) as sess:
sess.run(m.init_op)
sv.saver.restore(sess, last_checkpoint)
sv.start_queue_runners(sess)
if args.control.reset_rng_seed:
train_step_kwargs['rng_data'] = [np.random.RandomState(rng_data_seed),
np.random.RandomState(rng_data_seed)]
train_step_kwargs['rng_action'] = np.random.RandomState(rng_action_seed)
vals, _ = tf_utils.train_step_custom_online_sampling(
sess, None, m.global_step_op, train_step_kwargs,
mode=args.control.test_mode)
should_stop = False
if checkpoint_iter >= args.solver.max_steps:
should_stop = True
if should_stop:
break
if __name__ == '__main__':
app.run()
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""
Code for plotting trajectories in the top view, and also plot first person views
from saved trajectories. Does not run the network but only loads the mesh data
to plot the view points.
CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64
PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_plot_trajectory.py \
--first_person --num_steps 40 \
--config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \
--imset test --alsologtostderr --base_dir output --out_dir vis
"""
import os, sys, numpy as np, copy
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.gridspec import GridSpec
import tensorflow as tf
from tensorflow.contrib import slim
import cv2
import logging
from tensorflow.python.platform import gfile
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from datasets import nav_env
import scripts.script_nav_agent_release as sna
import src.file_utils as fu
from src import graph_utils
from src import utils
FLAGS = flags.FLAGS
flags.DEFINE_string('out_dir', 'vis', 'Directory where to store the output')
flags.DEFINE_string('type', '', 'Optional type.')
flags.DEFINE_bool('first_person', False, 'Visualize the first person view.')
flags.DEFINE_bool('top_view', False, 'Visualize the trajectory in the top view.')
flags.DEFINE_integer('num_steps', 40, 'Number of steps to run the model for.')
flags.DEFINE_string('imset', 'test', '')
flags.DEFINE_string('base_dir', 'output', 'Cache directory.')
def _get_suffix_str():
return ''
def _load_trajectory():
base_dir = FLAGS.base_dir
config_name = FLAGS.config_name+_get_suffix_str()
dir_name = os.path.join(base_dir, FLAGS.type, config_name)
logging.info('Waiting for snapshot in directory %s.', dir_name)
last_checkpoint = slim.evaluation.wait_for_new_checkpoint(dir_name, None)
checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1])
# Load the distances.
a = utils.load_variables(os.path.join(dir_name, 'bench_on_'+FLAGS.imset,
'all_locs_at_t_{:d}.pkl'.format(checkpoint_iter)))
return a
def _compute_hardness():
# Load the stanford data to compute the hardness.
if FLAGS.type == '':
args = sna.get_args_for_config(FLAGS.config_name+'+bench_'+FLAGS.imset)
else:
args = sna.get_args_for_config(FLAGS.type+'.'+FLAGS.config_name+'+bench_'+FLAGS.imset)
args.navtask.logdir = None
R = lambda: nav_env.get_multiplexer_class(args.navtask, 0)
R = R()
rng_data = [np.random.RandomState(0), np.random.RandomState(0)]
# Sample a room.
h_dists = []
gt_dists = []
for i in range(250):
e = R.sample_env(rng_data)
nodes = e.task.nodes
# Initialize the agent.
init_env_state = e.reset(rng_data)
gt_dist_to_goal = [e.episode.dist_to_goal[0][j][s]
for j, s in enumerate(e.episode.start_node_ids)]
for j in range(args.navtask.task_params.batch_size):
start_node_id = e.episode.start_node_ids[j]
end_node_id =e.episode.goal_node_ids[0][j]
h_dist = graph_utils.heuristic_fn_vec(
nodes[[start_node_id],:], nodes[[end_node_id], :],
n_ori=args.navtask.task_params.n_ori,
step_size=args.navtask.task_params.step_size)[0][0]
gt_dist = e.episode.dist_to_goal[0][j][start_node_id]
h_dists.append(h_dist)
gt_dists.append(gt_dist)
h_dists = np.array(h_dists)
gt_dists = np.array(gt_dists)
e = R.sample_env([np.random.RandomState(0), np.random.RandomState(0)])
input = e.get_common_data()
orig_maps = input['orig_maps'][0,0,:,:,0]
return h_dists, gt_dists, orig_maps
def plot_trajectory_first_person(dt, orig_maps, out_dir):
out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(),
FLAGS.imset)
fu.makedirs(out_dir)
# Load the model so that we can render.
plt.set_cmap('gray')
samples_per_action = 8; wait_at_action = 0;
Writer = animation.writers['mencoder']
writer = Writer(fps=3*(samples_per_action+wait_at_action),
metadata=dict(artist='anonymous'), bitrate=1800)
args = sna.get_args_for_config(FLAGS.config_name + '+bench_'+FLAGS.imset)
args.navtask.logdir = None
navtask_ = copy.deepcopy(args.navtask)
navtask_.camera_param.modalities = ['rgb']
navtask_.task_params.modalities = ['rgb']
sz = 512
navtask_.camera_param.height = sz
navtask_.camera_param.width = sz
navtask_.task_params.img_height = sz
navtask_.task_params.img_width = sz
R = lambda: nav_env.get_multiplexer_class(navtask_, 0)
R = R()
b = R.buildings[0]
f = [0 for _ in range(wait_at_action)] + \
[float(_)/samples_per_action for _ in range(samples_per_action)];
# Generate things for it to render.
inds_to_do = []
inds_to_do += [1, 4, 10] #1291, 1268, 1273, 1289, 1302, 1426, 1413, 1449, 1399, 1390]
for i in inds_to_do:
fig = plt.figure(figsize=(10,8))
gs = GridSpec(3,4)
gs.update(wspace=0.05, hspace=0.05, left=0.0, top=0.97, right=1.0, bottom=0.)
ax = fig.add_subplot(gs[:,:-1])
ax1 = fig.add_subplot(gs[0,-1])
ax2 = fig.add_subplot(gs[1,-1])
ax3 = fig.add_subplot(gs[2,-1])
axes = [ax, ax1, ax2, ax3]
# ax = fig.add_subplot(gs[:,:])
# axes = [ax]
for ax in axes:
ax.set_axis_off()
node_ids = dt['all_node_ids'][i, :, 0]*1
# Prune so that last node is not repeated more than 3 times?
if np.all(node_ids[-4:] == node_ids[-1]):
while node_ids[-4] == node_ids[-1]:
node_ids = node_ids[:-1]
num_steps = np.minimum(FLAGS.num_steps, len(node_ids))
xyt = b.to_actual_xyt_vec(b.task.nodes[node_ids])
xyt_diff = xyt[1:,:] - xyt[:-1:,:]
xyt_diff[:,2] = np.mod(xyt_diff[:,2], 4)
ind = np.where(xyt_diff[:,2] == 3)[0]
xyt_diff[ind, 2] = -1
xyt_diff = np.expand_dims(xyt_diff, axis=1)
to_cat = [xyt_diff*_ for _ in f]
perturbs_all = np.concatenate(to_cat, axis=1)
perturbs_all = np.concatenate([perturbs_all, np.zeros_like(perturbs_all[:,:,:1])], axis=2)
node_ids_all = np.expand_dims(node_ids, axis=1)*1
node_ids_all = np.concatenate([node_ids_all for _ in f], axis=1)
node_ids_all = np.reshape(node_ids_all[:-1,:], -1)
perturbs_all = np.reshape(perturbs_all, [-1, 4])
imgs = b.render_nodes(b.task.nodes[node_ids_all,:], perturb=perturbs_all)
# Get action at each node.
actions = []
_, action_to_nodes = b.get_feasible_actions(node_ids)
for j in range(num_steps-1):
action_to_node = action_to_nodes[j]
node_to_action = dict(zip(action_to_node.values(), action_to_node.keys()))
actions.append(node_to_action[node_ids[j+1]])
def init_fn():
return fig,
gt_dist_to_goal = []
# Render trajectories.
def worker(j):
# Plot the image.
step_number = j/(samples_per_action + wait_at_action)
img = imgs[j]; ax = axes[0]; ax.clear(); ax.set_axis_off();
img = img.astype(np.uint8); ax.imshow(img);
tt = ax.set_title(
"First Person View\n" +
"Top corners show diagnostics (distance, agents' action) not input to agent.",
fontsize=12)
plt.setp(tt, color='white')
# Distance to goal.
t = 'Dist to Goal:\n{:2d} steps'.format(int(dt['all_d_at_t'][i, step_number]))
t = ax.text(0.01, 0.99, t,
horizontalalignment='left',
verticalalignment='top',
fontsize=20, color='red',
transform=ax.transAxes, alpha=1.0)
t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1))
# Action to take.
action_latex = ['$\odot$ ', '$\curvearrowright$ ', '$\curvearrowleft$ ', '$\Uparrow$ ']
t = ax.text(0.99, 0.99, action_latex[actions[step_number]],
horizontalalignment='right',
verticalalignment='top',
fontsize=40, color='green',
transform=ax.transAxes, alpha=1.0)
t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1))
# Plot the map top view.
ax = axes[-1]
if j == 0:
# Plot the map
locs = dt['all_locs'][i,:num_steps,:]
goal_loc = dt['all_goal_locs'][i,:,:]
xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0))
xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0))
xy1 = (xymax+xymin)/2. - 0.7*np.maximum(np.max(xymax-xymin), 24)
xy2 = (xymax+xymin)/2. + 0.7*np.maximum(np.max(xymax-xymin), 24)
ax.set_axis_on()
ax.patch.set_facecolor((0.333, 0.333, 0.333))
ax.set_xticks([]); ax.set_yticks([]);
ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0)
ax.plot(goal_loc[:,0], goal_loc[:,1], 'g*', markersize=12)
locs = dt['all_locs'][i,:1,:]
ax.plot(locs[:,0], locs[:,1], 'b.', markersize=12)
ax.set_xlim([xy1[0], xy2[0]])
ax.set_ylim([xy1[1], xy2[1]])
locs = dt['all_locs'][i,step_number,:]
locs = np.expand_dims(locs, axis=0)
ax.plot(locs[:,0], locs[:,1], 'r.', alpha=1.0, linewidth=0, markersize=4)
tt = ax.set_title('Trajectory in topview', fontsize=14)
plt.setp(tt, color='white')
return fig,
line_ani = animation.FuncAnimation(fig, worker,
(num_steps-1)*(wait_at_action+samples_per_action),
interval=500, blit=True, init_func=init_fn)
tmp_file_name = 'tmp.mp4'
line_ani.save(tmp_file_name, writer=writer, savefig_kwargs={'facecolor':'black'})
out_file_name = os.path.join(out_dir, 'vis_{:04d}.mp4'.format(i))
print out_file_name
if fu.exists(out_file_name):
gfile.Remove(out_file_name)
gfile.Copy(tmp_file_name, out_file_name)
gfile.Remove(tmp_file_name)
plt.close(fig)
def plot_trajectory(dt, hardness, orig_maps, out_dir):
out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(),
FLAGS.imset)
fu.makedirs(out_dir)
out_file = os.path.join(out_dir, 'all_locs_at_t.pkl')
dt['hardness'] = hardness
utils.save_variables(out_file, dt.values(), dt.keys(), overwrite=True)
#Plot trajectories onto the maps
plt.set_cmap('gray')
for i in range(4000):
goal_loc = dt['all_goal_locs'][i, :, :]
locs = np.concatenate((dt['all_locs'][i,:,:],
dt['all_locs'][i,:,:]), axis=0)
xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0))
xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0))
xy1 = (xymax+xymin)/2. - 1.*np.maximum(np.max(xymax-xymin), 24)
xy2 = (xymax+xymin)/2. + 1.*np.maximum(np.max(xymax-xymin), 24)
fig, ax = utils.tight_imshow_figure(plt, figsize=(6,6))
ax.set_axis_on()
ax.patch.set_facecolor((0.333, 0.333, 0.333))
ax.set_xticks([])
ax.set_yticks([])
all_locs = dt['all_locs'][i,:,:]*1
uniq = np.where(np.any(all_locs[1:,:] != all_locs[:-1,:], axis=1))[0]+1
uniq = np.sort(uniq).tolist()
uniq.insert(0,0)
uniq = np.array(uniq)
all_locs = all_locs[uniq, :]
ax.plot(dt['all_locs'][i, 0, 0],
dt['all_locs'][i, 0, 1], 'b.', markersize=24)
ax.plot(dt['all_goal_locs'][i, 0, 0],
dt['all_goal_locs'][i, 0, 1], 'g*', markersize=19)
ax.plot(all_locs[:,0], all_locs[:,1], 'r', alpha=0.4, linewidth=2)
ax.scatter(all_locs[:,0], all_locs[:,1],
c=5+np.arange(all_locs.shape[0])*1./all_locs.shape[0],
cmap='Reds', s=30, linewidth=0)
ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0, aspect='equal')
ax.set_xlim([xy1[0], xy2[0]])
ax.set_ylim([xy1[1], xy2[1]])
file_name = os.path.join(out_dir, 'trajectory_{:04d}.png'.format(i))
print file_name
with fu.fopen(file_name, 'w') as f:
plt.savefig(f)
plt.close(fig)
def main(_):
a = _load_trajectory()
h_dists, gt_dists, orig_maps = _compute_hardness()
hardness = 1.-h_dists*1./ gt_dists
if FLAGS.top_view:
plot_trajectory(a, hardness, orig_maps, out_dir=FLAGS.out_dir)
if FLAGS.first_person:
plot_trajectory_first_person(a, orig_maps, out_dir=FLAGS.out_dir)
if __name__ == '__main__':
app.run()
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os
import glob
import numpy as np
import logging
import cPickle
from datasets import nav_env
from datasets import factory
from src import utils
from src import map_utils as mu
logging.basicConfig(level=logging.INFO)
DATA_DIR = 'data/stanford_building_parser_dataset_raw/'
mkdir_if_missing = utils.mkdir_if_missing
save_variables = utils.save_variables
def _get_semantic_maps(building_name, transform, map_, flip, cats):
rooms = get_room_in_building(building_name)
maps = []
for cat in cats:
maps.append(np.zeros((map_.size[1], map_.size[0])))
for r in rooms:
room = load_room(building_name, r, category_list=cats)
classes = room['class_id']
for i, cat in enumerate(cats):
c_ind = cats.index(cat)
ind = [_ for _, c in enumerate(classes) if c == c_ind]
if len(ind) > 0:
vs = [room['vertexs'][x]*1 for x in ind]
vs = np.concatenate(vs, axis=0)
if transform:
vs = np.array([vs[:,1], vs[:,0], vs[:,2]]).T
vs[:,0] = -vs[:,0]
vs[:,1] += 4.20
vs[:,0] += 6.20
vs = vs*100.
if flip:
vs[:,1] = -vs[:,1]
maps[i] = maps[i] + \
mu._project_to_map(map_, vs, ignore_points_outside_map=True)
return maps
def _map_building_name(building_name):
b = int(building_name.split('_')[0][4])
out_name = 'Area_{:d}'.format(b)
if b == 5:
if int(building_name.split('_')[0][5]) == 1:
transform = True
else:
transform = False
else:
transform = False
return out_name, transform
def get_categories():
cats = ['beam', 'board', 'bookcase', 'ceiling', 'chair', 'clutter', 'column',
'door', 'floor', 'sofa', 'table', 'wall', 'window']
return cats
def _write_map_files(b_in, b_out, transform):
cats = get_categories()
env = utils.Foo(padding=10, resolution=5, num_point_threshold=2,
valid_min=-10, valid_max=200, n_samples_per_face=200)
robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120,
camera_elevation_degree=-15)
building_loader = factory.get_dataset('sbpd')
for flip in [False, True]:
b = nav_env.Building(b_out, robot, env, flip=flip,
building_loader=building_loader)
logging.info("building_in: %s, building_out: %s, transform: %d", b_in,
b_out, transform)
maps = _get_semantic_maps(b_in, transform, b.map, flip, cats)
maps = np.transpose(np.array(maps), axes=[1,2,0])
# Load file from the cache.
file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl'
file_name = file_name.format(b.building_name, b.map.size[0], b.map.size[1],
b.map.origin[0], b.map.origin[1],
b.map.resolution, flip)
out_file = os.path.join(DATA_DIR, 'processing', 'class-maps', file_name)
logging.info('Writing semantic maps to %s.', out_file)
save_variables(out_file, [maps, cats], ['maps', 'cats'], overwrite=True)
def _transform_area5b(room_dimension):
for a in room_dimension.keys():
r = room_dimension[a]*1
r[[0,1,3,4]] = r[[1,0,4,3]]
r[[0,3]] = -r[[3,0]]
r[[1,4]] += 4.20
r[[0,3]] += 6.20
room_dimension[a] = r
return room_dimension
def collect_room(building_name, room_name):
room_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name,
room_name, 'Annotations')
files = glob.glob1(room_dir, '*.txt')
files = sorted(files, key=lambda s: s.lower())
vertexs = []; colors = [];
for f in files:
file_name = os.path.join(room_dir, f)
logging.info(' %s', file_name)
a = np.loadtxt(file_name)
vertex = a[:,:3]*1.
color = a[:,3:]*1
color = color.astype(np.uint8)
vertexs.append(vertex)
colors.append(color)
files = [f.split('.')[0] for f in files]
out = {'vertexs': vertexs, 'colors': colors, 'names': files}
return out
def load_room(building_name, room_name, category_list=None):
room = collect_room(building_name, room_name)
room['building_name'] = building_name
room['room_name'] = room_name
instance_id = range(len(room['names']))
room['instance_id'] = instance_id
if category_list is not None:
name = [r.split('_')[0] for r in room['names']]
class_id = []
for n in name:
if n in category_list:
class_id.append(category_list.index(n))
else:
class_id.append(len(category_list))
room['class_id'] = class_id
room['category_list'] = category_list
return room
def get_room_in_building(building_name):
building_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name)
rn = os.listdir(building_dir)
rn = [x for x in rn if os.path.isdir(os.path.join(building_dir, x))]
rn = sorted(rn, key=lambda s: s.lower())
return rn
def write_room_dimensions(b_in, b_out, transform):
rooms = get_room_in_building(b_in)
room_dimension = {}
for r in rooms:
room = load_room(b_in, r, category_list=None)
vertex = np.concatenate(room['vertexs'], axis=0)
room_dimension[r] = np.concatenate((np.min(vertex, axis=0), np.max(vertex, axis=0)), axis=0)
if transform == 1:
room_dimension = _transform_area5b(room_dimension)
out_file = os.path.join(DATA_DIR, 'processing', 'room-dimension', b_out+'.pkl')
save_variables(out_file, [room_dimension], ['room_dimension'], overwrite=True)
def write_room_dimensions_all(I):
mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'room-dimension'))
bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6']
bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6']
transforms = [0, 0, 0, 0, 0, 1, 0]
for i in I:
b_in = bs_in[i]
b_out = bs_out[i]
t = transforms[i]
write_room_dimensions(b_in, b_out, t)
def write_class_maps_all(I):
mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'class-maps'))
bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6']
bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6']
transforms = [0, 0, 0, 0, 0, 1, 0]
for i in I:
b_in = bs_in[i]
b_out = bs_out[i]
t = transforms[i]
_write_map_files(b_in, b_out, t)
if __name__ == '__main__':
write_room_dimensions_all([0, 2, 3, 4, 5, 6])
write_class_maps_all([0, 2, 3, 4, 5, 6])
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
cd data/stanford_building_parser_dataset_raw
unzip Stanford3dDataset_v1.2.zip
cd ../../
PYOPENGL_PLATFORM=egl PYTHONPATH='.' python scripts/script_preprocess_annoations_S3DIS.py
mv data/stanford_building_parser_dataset_raw/processing/room-dimension data/stanford_building_parser_dataset/.
mv data/stanford_building_parser_dataset_raw/processing/class-maps data/stanford_building_parser_dataset/.
echo "You may now delete data/stanford_building_parser_dataset_raw if needed."
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment