Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
44fa1d37
Commit
44fa1d37
authored
Jun 29, 2017
by
Alex Lee
Browse files
Merge remote-tracking branch 'upstream/master'
parents
d3628a74
6e367f67
Changes
293
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3703 additions
and
0 deletions
+3703
-0
attention_ocr/python/model_test.py
attention_ocr/python/model_test.py
+181
-0
attention_ocr/python/sequence_layers.py
attention_ocr/python/sequence_layers.py
+422
-0
attention_ocr/python/sequence_layers_test.py
attention_ocr/python/sequence_layers_test.py
+112
-0
attention_ocr/python/train.py
attention_ocr/python/train.py
+209
-0
attention_ocr/python/utils.py
attention_ocr/python/utils.py
+80
-0
cognitive_mapping_and_planning/.gitignore
cognitive_mapping_and_planning/.gitignore
+4
-0
cognitive_mapping_and_planning/README.md
cognitive_mapping_and_planning/README.md
+122
-0
cognitive_mapping_and_planning/__init__.py
cognitive_mapping_and_planning/__init__.py
+0
-0
cognitive_mapping_and_planning/cfgs/__init__.py
cognitive_mapping_and_planning/cfgs/__init__.py
+0
-0
cognitive_mapping_and_planning/cfgs/config_cmp.py
cognitive_mapping_and_planning/cfgs/config_cmp.py
+283
-0
cognitive_mapping_and_planning/cfgs/config_common.py
cognitive_mapping_and_planning/cfgs/config_common.py
+261
-0
cognitive_mapping_and_planning/cfgs/config_distill.py
cognitive_mapping_and_planning/cfgs/config_distill.py
+114
-0
cognitive_mapping_and_planning/cfgs/config_vision_baseline.py
...itive_mapping_and_planning/cfgs/config_vision_baseline.py
+173
-0
cognitive_mapping_and_planning/data/.gitignore
cognitive_mapping_and_planning/data/.gitignore
+3
-0
cognitive_mapping_and_planning/data/README.md
cognitive_mapping_and_planning/data/README.md
+33
-0
cognitive_mapping_and_planning/datasets/__init__.py
cognitive_mapping_and_planning/datasets/__init__.py
+0
-0
cognitive_mapping_and_planning/datasets/factory.py
cognitive_mapping_and_planning/datasets/factory.py
+113
-0
cognitive_mapping_and_planning/datasets/nav_env.py
cognitive_mapping_and_planning/datasets/nav_env.py
+1465
-0
cognitive_mapping_and_planning/datasets/nav_env_config.py
cognitive_mapping_and_planning/datasets/nav_env_config.py
+127
-0
cognitive_mapping_and_planning/matplotlibrc
cognitive_mapping_and_planning/matplotlibrc
+1
-0
No files found.
Too many changes to show.
To preserve performance only
293 of 293+
files are displayed.
Plain diff
Email patch
attention_ocr/python/model_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the model."""
import
numpy
as
np
import
string
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
from
tensorflow.contrib.tfprof
import
model_analyzer
import
model
import
data_provider
def
create_fake_charset
(
num_char_classes
):
charset
=
{}
for
i
in
xrange
(
num_char_classes
):
charset
[
i
]
=
string
.
printable
[
i
%
len
(
string
.
printable
)]
return
charset
class
ModelTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
tf
.
test
.
TestCase
.
setUp
(
self
)
self
.
rng
=
np
.
random
.
RandomState
([
11
,
23
,
50
])
self
.
batch_size
=
4
self
.
image_width
=
600
self
.
image_height
=
30
self
.
seq_length
=
40
self
.
num_char_classes
=
72
self
.
null_code
=
62
self
.
num_views
=
4
feature_size
=
288
self
.
conv_tower_shape
=
(
self
.
batch_size
,
1
,
72
,
feature_size
)
self
.
features_shape
=
(
self
.
batch_size
,
self
.
seq_length
,
feature_size
)
self
.
chars_logit_shape
=
(
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
)
self
.
length_logit_shape
=
(
self
.
batch_size
,
self
.
seq_length
+
1
)
self
.
initialize_fakes
()
def
initialize_fakes
(
self
):
self
.
images_shape
=
(
self
.
batch_size
,
self
.
image_height
,
self
.
image_width
,
3
)
self
.
fake_images
=
tf
.
constant
(
self
.
rng
.
randint
(
low
=
0
,
high
=
255
,
size
=
self
.
images_shape
).
astype
(
'float32'
),
name
=
'input_node'
)
self
.
fake_conv_tower_np
=
tf
.
constant
(
self
.
rng
.
randn
(
*
self
.
conv_tower_shape
).
astype
(
'float32'
))
self
.
fake_logits
=
tf
.
constant
(
self
.
rng
.
randn
(
*
self
.
chars_logit_shape
).
astype
(
'float32'
))
self
.
fake_labels
=
tf
.
constant
(
self
.
rng
.
randint
(
low
=
0
,
high
=
self
.
num_char_classes
,
size
=
(
self
.
batch_size
,
self
.
seq_length
)).
astype
(
'int64'
))
def
create_model
(
self
):
return
model
.
Model
(
self
.
num_char_classes
,
self
.
seq_length
,
num_views
=
4
,
null_code
=
62
)
def
test_char_related_shapes
(
self
):
ocr_model
=
self
.
create_model
()
with
self
.
test_session
()
as
sess
:
endpoints_tf
=
ocr_model
.
create_base
(
images
=
self
.
fake_images
,
labels_one_hot
=
None
)
sess
.
run
(
tf
.
global_variables_initializer
())
endpoints
=
sess
.
run
(
endpoints_tf
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_logit
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
,
self
.
num_char_classes
),
endpoints
.
chars_log_prob
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
),
endpoints
.
predicted_chars
.
shape
)
self
.
assertEqual
((
self
.
batch_size
,
self
.
seq_length
),
endpoints
.
predicted_scores
.
shape
)
def
test_predicted_scores_are_within_range
(
self
):
ocr_model
=
self
.
create_model
()
_
,
_
,
scores
=
ocr_model
.
char_predictions
(
self
.
fake_logits
)
with
self
.
test_session
()
as
sess
:
scores_np
=
sess
.
run
(
scores
)
values_in_range
=
(
scores_np
>=
0.0
)
&
(
scores_np
<=
1.0
)
self
.
assertTrue
(
np
.
all
(
values_in_range
),
msg
=
(
'Scores contains out of the range values %s'
%
scores_np
[
np
.
logical_not
(
values_in_range
)]))
def
test_conv_tower_shape
(
self
):
with
self
.
test_session
()
as
sess
:
ocr_model
=
self
.
create_model
()
conv_tower
=
ocr_model
.
conv_tower_fn
(
self
.
fake_images
)
sess
.
run
(
tf
.
global_variables_initializer
())
conv_tower_np
=
sess
.
run
(
conv_tower
)
self
.
assertEqual
(
self
.
conv_tower_shape
,
conv_tower_np
.
shape
)
def
test_model_size_less_then1_gb
(
self
):
# NOTE: Actual amount of memory occupied my TF during training will be at
# least 4X times bigger because of space need to store original weights,
# updates, gradients and variances. It also depends on the type of used
# optimizer.
ocr_model
=
self
.
create_model
()
ocr_model
.
create_base
(
images
=
self
.
fake_images
,
labels_one_hot
=
None
)
with
self
.
test_session
()
as
sess
:
tfprof_root
=
model_analyzer
.
print_model_analysis
(
sess
.
graph
,
tfprof_options
=
model_analyzer
.
TRAINABLE_VARS_PARAMS_STAT_OPTIONS
)
model_size_bytes
=
4
*
tfprof_root
.
total_parameters
self
.
assertLess
(
model_size_bytes
,
1
*
2
**
30
)
def
test_create_summaries_is_runnable
(
self
):
ocr_model
=
self
.
create_model
()
data
=
data_provider
.
InputEndpoints
(
images
=
self
.
fake_images
,
images_orig
=
self
.
fake_images
,
labels
=
self
.
fake_labels
,
labels_one_hot
=
slim
.
one_hot_encoding
(
self
.
fake_labels
,
self
.
num_char_classes
))
endpoints
=
ocr_model
.
create_base
(
images
=
self
.
fake_images
,
labels_one_hot
=
None
)
charset
=
create_fake_charset
(
self
.
num_char_classes
)
summaries
=
ocr_model
.
create_summaries
(
data
,
endpoints
,
charset
,
is_training
=
False
)
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
sess
.
run
(
tf
.
local_variables_initializer
())
tf
.
tables_initializer
().
run
()
sess
.
run
(
summaries
)
# just check it is runnable
def
test_sequence_loss_function_without_label_smoothing
(
self
):
model
=
self
.
create_model
()
model
.
set_mparam
(
'sequence_loss_fn'
,
label_smoothing
=
0
)
loss
=
model
.
sequence_loss_fn
(
self
.
fake_logits
,
self
.
fake_labels
)
with
self
.
test_session
()
as
sess
:
loss_np
=
sess
.
run
(
loss
)
# This test checks that the loss function is 'runnable'.
self
.
assertEqual
(
loss_np
.
shape
,
tuple
())
class
CharsetMapperTest
(
tf
.
test
.
TestCase
):
def
test_text_corresponds_to_ids
(
self
):
charset
=
create_fake_charset
(
36
)
ids
=
tf
.
constant
(
[[
17
,
14
,
21
,
21
,
24
],
[
32
,
24
,
27
,
21
,
13
]],
dtype
=
tf
.
int64
)
charset_mapper
=
model
.
CharsetMapper
(
charset
)
with
self
.
test_session
()
as
sess
:
tf
.
tables_initializer
().
run
()
text
=
sess
.
run
(
charset_mapper
.
get_text
(
ids
))
self
.
assertAllEqual
(
text
,
[
'hello'
,
'world'
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
attention_ocr/python/sequence_layers.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various implementations of sequence layers for character prediction.
A 'sequence layer' is a part of a computation graph which is responsible of
producing a sequence of characters using extracted image features. There are
many reasonable ways to implement such layers. All of them are using RNNs.
This module provides implementations which uses 'attention' mechanism to
spatially 'pool' image features and also can use a previously predicted
character to predict the next (aka auto regression).
Usage:
Select one of available classes, e.g. Attention or use a wrapper function to
pick one based on your requirements:
layer_class = sequence_layers.get_layer_class(use_attention=True,
use_autoregression=True)
layer = layer_class(net, labels_one_hot, model_params, method_params)
char_logits = layer.create_logits()
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
abc
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
def
orthogonal_initializer
(
shape
,
dtype
=
tf
.
float32
,
*
args
,
**
kwargs
):
"""Generates orthonormal matrices with random values.
Orthonormal initialization is important for RNNs:
http://arxiv.org/abs/1312.6120
http://smerity.com/articles/2016/orthogonal_init.html
For non-square shapes the returned matrix will be semi-orthonormal: if the
number of columns exceeds the number of rows, then the rows are orthonormal
vectors; but if the number of rows exceeds the number of columns, then the
columns are orthonormal vectors.
We use SVD decomposition to generate an orthonormal matrix with random
values. The same way as it is done in the Lasagne library for Theano. Note
that both u and v returned by the svd are orthogonal and random. We just need
to pick one with the right shape.
Args:
shape: a shape of the tensor matrix to initialize.
dtype: a dtype of the initialized tensor.
*args: not used.
**kwargs: not used.
Returns:
An initialized tensor.
"""
del
args
del
kwargs
flat_shape
=
(
shape
[
0
],
np
.
prod
(
shape
[
1
:]))
w
=
np
.
random
.
randn
(
*
flat_shape
)
u
,
_
,
v
=
np
.
linalg
.
svd
(
w
,
full_matrices
=
False
)
w
=
u
if
u
.
shape
==
flat_shape
else
v
return
tf
.
constant
(
w
.
reshape
(
shape
),
dtype
=
dtype
)
SequenceLayerParams
=
collections
.
namedtuple
(
'SequenceLogitsParams'
,
[
'num_lstm_units'
,
'weight_decay'
,
'lstm_state_clip_value'
])
class
SequenceLayerBase
(
object
):
"""A base abstruct class for all sequence layers.
A child class has to define following methods:
get_train_input
get_eval_input
unroll_cell
"""
__metaclass__
=
abc
.
ABCMeta
def
__init__
(
self
,
net
,
labels_one_hot
,
model_params
,
method_params
):
"""Stores argument in member variable for further use.
Args:
net: A tensor with shape [batch_size, num_features, feature_size] which
contains some extracted image features.
labels_one_hot: An optional (can be None) ground truth labels for the
input features. Is a tensor with shape
[batch_size, seq_length, num_char_classes]
model_params: A namedtuple with model parameters (model.ModelParams).
method_params: A SequenceLayerParams instance.
"""
self
.
_params
=
model_params
self
.
_mparams
=
method_params
self
.
_net
=
net
self
.
_labels_one_hot
=
labels_one_hot
self
.
_batch_size
=
net
.
get_shape
().
dims
[
0
].
value
# Initialize parameters for char logits which will be computed on the fly
# inside an LSTM decoder.
self
.
_char_logits
=
{}
regularizer
=
slim
.
l2_regularizer
(
self
.
_mparams
.
weight_decay
)
self
.
_softmax_w
=
slim
.
model_variable
(
'softmax_w'
,
[
self
.
_mparams
.
num_lstm_units
,
self
.
_params
.
num_char_classes
],
initializer
=
orthogonal_initializer
,
regularizer
=
regularizer
)
self
.
_softmax_b
=
slim
.
model_variable
(
'softmax_b'
,
[
self
.
_params
.
num_char_classes
],
initializer
=
tf
.
zeros_initializer
(),
regularizer
=
regularizer
)
@
abc
.
abstractmethod
def
get_train_input
(
self
,
prev
,
i
):
"""Returns a sample to be used to predict a character during training.
This function is used as a loop_function for an RNN decoder.
Args:
prev: output tensor from previous step of the RNN. A tensor with shape:
[batch_size, num_char_classes].
i: index of a character in the output sequence.
Returns:
A tensor with shape [batch_size, ?] - depth depends on implementation
details.
"""
pass
@
abc
.
abstractmethod
def
get_eval_input
(
self
,
prev
,
i
):
"""Returns a sample to be used to predict a character during inference.
This function is used as a loop_function for an RNN decoder.
Args:
prev: output tensor from previous step of the RNN. A tensor with shape:
[batch_size, num_char_classes].
i: index of a character in the output sequence.
Returns:
A tensor with shape [batch_size, ?] - depth depends on implementation
details.
"""
raise
AssertionError
(
'Not implemented'
)
@
abc
.
abstractmethod
def
unroll_cell
(
self
,
decoder_inputs
,
initial_state
,
loop_function
,
cell
):
"""Unrolls an RNN cell for all inputs.
This is a placeholder to call some RNN decoder. It has a similar to
tf.seq2seq.rnn_decode interface.
Args:
decoder_inputs: A list of 2D Tensors* [batch_size x input_size]. In fact,
most of existing decoders in presence of a loop_function use only the
first element to determine batch_size and length of the list to
determine number of steps.
initial_state: 2D Tensor with shape [batch_size x cell.state_size].
loop_function: function will be applied to the i-th output in order to
generate the i+1-st input (see self.get_input).
cell: rnn_cell.RNNCell defining the cell function and size.
Returns:
A tuple of the form (outputs, state), where:
outputs: A list of character logits of the same length as
decoder_inputs of 2D Tensors with shape [batch_size x num_characters].
state: The state of each cell at the final time-step.
It is a 2D Tensor of shape [batch_size x cell.state_size].
"""
pass
def
is_training
(
self
):
"""Returns True if the layer is created for training stage."""
return
self
.
_labels_one_hot
is
not
None
def
char_logit
(
self
,
inputs
,
char_index
):
"""Creates logits for a character if required.
Args:
inputs: A tensor with shape [batch_size, ?] (depth is implementation
dependent).
char_index: A integer index of a character in the output sequence.
Returns:
A tensor with shape [batch_size, num_char_classes]
"""
if
char_index
not
in
self
.
_char_logits
:
self
.
_char_logits
[
char_index
]
=
tf
.
nn
.
xw_plus_b
(
inputs
,
self
.
_softmax_w
,
self
.
_softmax_b
)
return
self
.
_char_logits
[
char_index
]
def
char_one_hot
(
self
,
logit
):
"""Creates one hot encoding for a logit of a character.
Args:
logit: A tensor with shape [batch_size, num_char_classes].
Returns:
A tensor with shape [batch_size, num_char_classes]
"""
prediction
=
tf
.
argmax
(
logit
,
dimension
=
1
)
return
slim
.
one_hot_encoding
(
prediction
,
self
.
_params
.
num_char_classes
)
def
get_input
(
self
,
prev
,
i
):
"""A wrapper for get_train_input and get_eval_input.
Args:
prev: output tensor from previous step of the RNN. A tensor with shape:
[batch_size, num_char_classes].
i: index of a character in the output sequence.
Returns:
A tensor with shape [batch_size, ?] - depth depends on implementation
details.
"""
if
self
.
is_training
():
return
self
.
get_train_input
(
prev
,
i
)
else
:
return
self
.
get_eval_input
(
prev
,
i
)
def
create_logits
(
self
):
"""Creates character sequence logits for a net specified in the constructor.
A "main" method for the sequence layer which glues together all pieces.
Returns:
A tensor with shape [batch_size, seq_length, num_char_classes].
"""
with
tf
.
variable_scope
(
'LSTM'
):
first_label
=
self
.
get_input
(
prev
=
None
,
i
=
0
)
decoder_inputs
=
[
first_label
]
+
[
None
]
*
(
self
.
_params
.
seq_length
-
1
)
lstm_cell
=
tf
.
contrib
.
rnn
.
LSTMCell
(
self
.
_mparams
.
num_lstm_units
,
use_peepholes
=
False
,
cell_clip
=
self
.
_mparams
.
lstm_state_clip_value
,
state_is_tuple
=
True
,
initializer
=
orthogonal_initializer
)
lstm_outputs
,
_
=
self
.
unroll_cell
(
decoder_inputs
=
decoder_inputs
,
initial_state
=
lstm_cell
.
zero_state
(
self
.
_batch_size
,
tf
.
float32
),
loop_function
=
self
.
get_input
,
cell
=
lstm_cell
)
with
tf
.
variable_scope
(
'logits'
):
logits_list
=
[
tf
.
expand_dims
(
self
.
char_logit
(
logit
,
i
),
dim
=
1
)
for
i
,
logit
in
enumerate
(
lstm_outputs
)
]
return
tf
.
concat
(
logits_list
,
1
)
class
NetSlice
(
SequenceLayerBase
):
"""A layer which uses a subset of image features to predict each character.
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
NetSlice
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_zero_label
=
tf
.
zeros
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
def
get_image_feature
(
self
,
char_index
):
"""Returns a subset of image features for a character.
Args:
char_index: an index of a character.
Returns:
A tensor with shape [batch_size, ?]. The output depth depends on the
depth of input net.
"""
batch_size
,
features_num
,
_
=
[
d
.
value
for
d
in
self
.
_net
.
get_shape
()]
slice_len
=
int
(
features_num
/
self
.
_params
.
seq_length
)
# In case when features_num != seq_length, we just pick a subset of image
# features, this choice is arbitrary and there is no intuitive geometrical
# interpretation. If features_num is not dividable by seq_length there will
# be unused image features.
net_slice
=
self
.
_net
[:,
char_index
:
char_index
+
slice_len
,
:]
feature
=
tf
.
reshape
(
net_slice
,
[
batch_size
,
-
1
])
logging
.
debug
(
'Image feature: %s'
,
feature
)
return
feature
def
get_eval_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_eval_input for details."""
del
prev
return
self
.
get_image_feature
(
i
)
def
get_train_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_train_input for details."""
return
self
.
get_eval_input
(
prev
,
i
)
def
unroll_cell
(
self
,
decoder_inputs
,
initial_state
,
loop_function
,
cell
):
"""See SequenceLayerBase.unroll_cell for details."""
return
tf
.
contrib
.
legacy_seq2seq
.
rnn_decoder
(
decoder_inputs
=
decoder_inputs
,
initial_state
=
initial_state
,
cell
=
cell
,
loop_function
=
self
.
get_input
)
class
NetSliceWithAutoregression
(
NetSlice
):
"""A layer similar to NetSlice, but it also uses auto regression.
The "auto regression" means that we use network output for previous character
as a part of input for the current character.
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
NetSliceWithAutoregression
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
get_eval_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_eval_input for details."""
if
i
==
0
:
prev
=
self
.
_zero_label
else
:
logit
=
self
.
char_logit
(
prev
,
char_index
=
i
-
1
)
prev
=
self
.
char_one_hot
(
logit
)
image_feature
=
self
.
get_image_feature
(
char_index
=
i
)
return
tf
.
concat
([
image_feature
,
prev
],
1
)
def
get_train_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_train_input for details."""
if
i
==
0
:
prev
=
self
.
_zero_label
else
:
prev
=
self
.
_labels_one_hot
[:,
i
-
1
,
:]
image_feature
=
self
.
get_image_feature
(
i
)
return
tf
.
concat
([
image_feature
,
prev
],
1
)
class
Attention
(
SequenceLayerBase
):
"""A layer which uses attention mechanism to select image features."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Attention
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_zero_label
=
tf
.
zeros
(
[
self
.
_batch_size
,
self
.
_params
.
num_char_classes
])
def
get_eval_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_eval_input for details."""
del
prev
,
i
# The attention_decoder will fetch image features from the net, no need for
# extra inputs.
return
self
.
_zero_label
def
get_train_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_train_input for details."""
return
self
.
get_eval_input
(
prev
,
i
)
def
unroll_cell
(
self
,
decoder_inputs
,
initial_state
,
loop_function
,
cell
):
return
tf
.
contrib
.
legacy_seq2seq
.
attention_decoder
(
decoder_inputs
=
decoder_inputs
,
initial_state
=
initial_state
,
attention_states
=
self
.
_net
,
cell
=
cell
,
loop_function
=
self
.
get_input
)
class
AttentionWithAutoregression
(
Attention
):
"""A layer which uses both attention and auto regression."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
AttentionWithAutoregression
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
get_train_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_train_input for details."""
if
i
==
0
:
return
self
.
_zero_label
else
:
# TODO(gorban): update to gradually introduce gt labels.
return
self
.
_labels_one_hot
[:,
i
-
1
,
:]
def
get_eval_input
(
self
,
prev
,
i
):
"""See SequenceLayerBase.get_eval_input for details."""
if
i
==
0
:
return
self
.
_zero_label
else
:
logit
=
self
.
char_logit
(
prev
,
char_index
=
i
-
1
)
return
self
.
char_one_hot
(
logit
)
def
get_layer_class
(
use_attention
,
use_autoregression
):
"""A convenience function to get a layer class based on requirements.
Args:
use_attention: if True a returned class will use attention.
use_autoregression: if True a returned class will use auto regression.
Returns:
One of available sequence layers (child classes for SequenceLayerBase).
"""
if
use_attention
and
use_autoregression
:
layer_class
=
AttentionWithAutoregression
elif
use_attention
and
not
use_autoregression
:
layer_class
=
Attention
elif
not
use_attention
and
not
use_autoregression
:
layer_class
=
NetSlice
elif
not
use_attention
and
use_autoregression
:
layer_class
=
NetSliceWithAutoregression
else
:
raise
AssertionError
(
'Unsupported sequence layer class'
)
logging
.
debug
(
'Use %s as a layer class'
,
layer_class
.
__name__
)
return
layer_class
attention_ocr/python/sequence_layers_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for sequence_layers."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
import
model
import
sequence_layers
def
fake_net
(
batch_size
,
num_features
,
feature_size
):
return
tf
.
convert_to_tensor
(
np
.
random
.
uniform
(
size
=
(
batch_size
,
num_features
,
feature_size
)),
dtype
=
tf
.
float32
)
def
fake_labels
(
batch_size
,
seq_length
,
num_char_classes
):
labels_np
=
tf
.
convert_to_tensor
(
np
.
random
.
randint
(
low
=
0
,
high
=
num_char_classes
,
size
=
(
batch_size
,
seq_length
)))
return
slim
.
one_hot_encoding
(
labels_np
,
num_classes
=
num_char_classes
)
def
create_layer
(
layer_class
,
batch_size
,
seq_length
,
num_char_classes
):
model_params
=
model
.
ModelParams
(
num_char_classes
=
num_char_classes
,
seq_length
=
seq_length
,
num_views
=
1
,
null_code
=
num_char_classes
)
net
=
fake_net
(
batch_size
=
batch_size
,
num_features
=
seq_length
*
5
,
feature_size
=
6
)
labels_one_hot
=
fake_labels
(
batch_size
,
seq_length
,
num_char_classes
)
layer_params
=
sequence_layers
.
SequenceLayerParams
(
num_lstm_units
=
10
,
weight_decay
=
0.00004
,
lstm_state_clip_value
=
10.0
)
return
layer_class
(
net
,
labels_one_hot
,
model_params
,
layer_params
)
class
SequenceLayersTest
(
tf
.
test
.
TestCase
):
def
test_net_slice_char_logits_with_correct_shape
(
self
):
batch_size
=
2
seq_length
=
4
num_char_classes
=
3
layer
=
create_layer
(
sequence_layers
.
NetSlice
,
batch_size
,
seq_length
,
num_char_classes
)
char_logits
=
layer
.
create_logits
()
self
.
assertEqual
(
tf
.
TensorShape
([
batch_size
,
seq_length
,
num_char_classes
]),
char_logits
.
get_shape
())
def
test_net_slice_with_autoregression_char_logits_with_correct_shape
(
self
):
batch_size
=
2
seq_length
=
4
num_char_classes
=
3
layer
=
create_layer
(
sequence_layers
.
NetSliceWithAutoregression
,
batch_size
,
seq_length
,
num_char_classes
)
char_logits
=
layer
.
create_logits
()
self
.
assertEqual
(
tf
.
TensorShape
([
batch_size
,
seq_length
,
num_char_classes
]),
char_logits
.
get_shape
())
def
test_attention_char_logits_with_correct_shape
(
self
):
batch_size
=
2
seq_length
=
4
num_char_classes
=
3
layer
=
create_layer
(
sequence_layers
.
Attention
,
batch_size
,
seq_length
,
num_char_classes
)
char_logits
=
layer
.
create_logits
()
self
.
assertEqual
(
tf
.
TensorShape
([
batch_size
,
seq_length
,
num_char_classes
]),
char_logits
.
get_shape
())
def
test_attention_with_autoregression_char_logits_with_correct_shape
(
self
):
batch_size
=
2
seq_length
=
4
num_char_classes
=
3
layer
=
create_layer
(
sequence_layers
.
AttentionWithAutoregression
,
batch_size
,
seq_length
,
num_char_classes
)
char_logits
=
layer
.
create_logits
()
self
.
assertEqual
(
tf
.
TensorShape
([
batch_size
,
seq_length
,
num_char_classes
]),
char_logits
.
get_shape
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
attention_ocr/python/train.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to train the Attention OCR model.
A simple usage example:
python train.py
"""
import
collections
import
logging
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
from
tensorflow
import
app
from
tensorflow.python.platform
import
flags
from
tensorflow.contrib.tfprof
import
model_analyzer
import
data_provider
import
common_flags
FLAGS
=
flags
.
FLAGS
common_flags
.
define
()
# yapf: disable
flags
.
DEFINE_integer
(
'task'
,
0
,
'The Task ID. This value is used when training with '
'multiple workers to identify each worker.'
)
flags
.
DEFINE_integer
(
'ps_tasks'
,
0
,
'The number of parameter servers. If the value is 0, then'
' the parameters are handled locally by the worker.'
)
flags
.
DEFINE_integer
(
'save_summaries_secs'
,
60
,
'The frequency with which summaries are saved, in '
'seconds.'
)
flags
.
DEFINE_integer
(
'save_interval_secs'
,
600
,
'Frequency in seconds of saving the model.'
)
flags
.
DEFINE_integer
(
'max_number_of_steps'
,
int
(
1e10
),
'The maximum number of gradient steps.'
)
flags
.
DEFINE_string
(
'checkpoint_inception'
,
''
,
'Checkpoint to recover inception weights from.'
)
flags
.
DEFINE_float
(
'clip_gradient_norm'
,
2.0
,
'If greater than 0 then the gradients would be clipped by '
'it.'
)
flags
.
DEFINE_bool
(
'sync_replicas'
,
False
,
'If True will synchronize replicas during training.'
)
flags
.
DEFINE_integer
(
'replicas_to_aggregate'
,
1
,
'The number of gradients updates before updating params.'
)
flags
.
DEFINE_integer
(
'total_num_replicas'
,
1
,
'Total number of worker replicas.'
)
flags
.
DEFINE_integer
(
'startup_delay_steps'
,
15
,
'Number of training steps between replicas startup.'
)
flags
.
DEFINE_boolean
(
'reset_train_dir'
,
False
,
'If true will delete all files in the train_log_dir'
)
flags
.
DEFINE_boolean
(
'show_graph_stats'
,
False
,
'Output model size stats to stderr.'
)
# yapf: enable
TrainingHParams
=
collections
.
namedtuple
(
'TrainingHParams'
,
[
'learning_rate'
,
'optimizer'
,
'momentum'
,
'use_augment_input'
,
])
def
get_training_hparams
():
return
TrainingHParams
(
learning_rate
=
FLAGS
.
learning_rate
,
optimizer
=
FLAGS
.
optimizer
,
momentum
=
FLAGS
.
momentum
,
use_augment_input
=
FLAGS
.
use_augment_input
)
def
create_optimizer
(
hparams
):
"""Creates optimized based on the specified flags."""
if
hparams
.
optimizer
==
'momentum'
:
optimizer
=
tf
.
train
.
MomentumOptimizer
(
hparams
.
learning_rate
,
momentum
=
hparams
.
momentum
)
elif
hparams
.
optimizer
==
'adam'
:
optimizer
=
tf
.
train
.
AdamOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'adadelta'
:
optimizer
=
tf
.
train
.
AdadeltaOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'adagrad'
:
optimizer
=
tf
.
train
.
AdagradOptimizer
(
hparams
.
learning_rate
)
elif
hparams
.
optimizer
==
'rmsprop'
:
optimizer
=
tf
.
train
.
RMSPropOptimizer
(
hparams
.
learning_rate
,
momentum
=
hparams
.
momentum
)
return
optimizer
def
train
(
loss
,
init_fn
,
hparams
):
"""Wraps slim.learning.train to run a training loop.
Args:
loss: a loss tensor
init_fn: A callable to be executed after all other initialization is done.
hparams: a model hyper parameters
"""
optimizer
=
create_optimizer
(
hparams
)
if
FLAGS
.
sync_replicas
:
replica_id
=
tf
.
constant
(
FLAGS
.
task
,
tf
.
int32
,
shape
=
())
optimizer
=
tf
.
LegacySyncReplicasOptimizer
(
opt
=
optimizer
,
replicas_to_aggregate
=
FLAGS
.
replicas_to_aggregate
,
replica_id
=
replica_id
,
total_num_replicas
=
FLAGS
.
total_num_replicas
)
sync_optimizer
=
optimizer
startup_delay_steps
=
0
else
:
startup_delay_steps
=
0
sync_optimizer
=
None
train_op
=
slim
.
learning
.
create_train_op
(
loss
,
optimizer
,
summarize_gradients
=
True
,
clip_gradient_norm
=
FLAGS
.
clip_gradient_norm
)
slim
.
learning
.
train
(
train_op
=
train_op
,
logdir
=
FLAGS
.
train_log_dir
,
graph
=
loss
.
graph
,
master
=
FLAGS
.
master
,
is_chief
=
(
FLAGS
.
task
==
0
),
number_of_steps
=
FLAGS
.
max_number_of_steps
,
save_summaries_secs
=
FLAGS
.
save_summaries_secs
,
save_interval_secs
=
FLAGS
.
save_interval_secs
,
startup_delay_steps
=
startup_delay_steps
,
sync_optimizer
=
sync_optimizer
,
init_fn
=
init_fn
)
def
prepare_training_dir
():
if
not
tf
.
gfile
.
Exists
(
FLAGS
.
train_log_dir
):
logging
.
info
(
'Create a new training directory %s'
,
FLAGS
.
train_log_dir
)
tf
.
gfile
.
MakeDirs
(
FLAGS
.
train_log_dir
)
else
:
if
FLAGS
.
reset_train_dir
:
logging
.
info
(
'Reset the training directory %s'
,
FLAGS
.
train_log_dir
)
tf
.
gfile
.
DeleteRecursively
(
FLAGS
.
train_log_dir
)
tf
.
gfile
.
MakeDirs
(
FLAGS
.
train_log_dir
)
else
:
logging
.
info
(
'Use already existing training directory %s'
,
FLAGS
.
train_log_dir
)
def
calculate_graph_metrics
():
param_stats
=
model_analyzer
.
print_model_analysis
(
tf
.
get_default_graph
(),
tfprof_options
=
model_analyzer
.
TRAINABLE_VARS_PARAMS_STAT_OPTIONS
)
return
param_stats
.
total_parameters
def
main
(
_
):
prepare_training_dir
()
dataset
=
common_flags
.
create_dataset
(
split_name
=
FLAGS
.
split_name
)
model
=
common_flags
.
create_model
(
dataset
.
num_char_classes
,
dataset
.
max_sequence_length
,
dataset
.
num_of_views
,
dataset
.
null_code
)
hparams
=
get_training_hparams
()
# If ps_tasks is zero, the local device is used. When using multiple
# (non-local) replicas, the ReplicaDeviceSetter distributes the variables
# across the different devices.
device_setter
=
tf
.
train
.
replica_device_setter
(
FLAGS
.
ps_tasks
,
merge_devices
=
True
)
with
tf
.
device
(
device_setter
):
data
=
data_provider
.
get_data
(
dataset
,
FLAGS
.
batch_size
,
augment
=
hparams
.
use_augment_input
,
central_crop_size
=
common_flags
.
get_crop_size
())
endpoints
=
model
.
create_base
(
data
.
images
,
data
.
labels_one_hot
)
total_loss
=
model
.
create_loss
(
data
,
endpoints
)
model
.
create_summaries
(
data
,
endpoints
,
dataset
.
charset
,
is_training
=
True
)
init_fn
=
model
.
create_init_fn_to_restore
(
FLAGS
.
checkpoint
,
FLAGS
.
checkpoint_inception
)
if
FLAGS
.
show_graph_stats
:
logging
.
info
(
'Total number of weights in the graph: %s'
,
calculate_graph_metrics
())
train
(
total_loss
,
init_fn
,
hparams
)
if
__name__
==
'__main__'
:
app
.
run
()
attention_ocr/python/utils.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to support building models for StreetView text transcription."""
import
tensorflow
as
tf
from
tensorflow.contrib
import
slim
def
logits_to_log_prob
(
logits
):
"""Computes log probabilities using numerically stable trick.
This uses two numerical stability tricks:
1) softmax(x) = softmax(x - c) where c is a constant applied to all
arguments. If we set c = max(x) then the softmax is more numerically
stable.
2) log softmax(x) is not numerically stable, but we can stabilize it
by using the identity log softmax(x) = x - log sum exp(x)
Args:
logits: Tensor of arbitrary shape whose last dimension contains logits.
Returns:
A tensor of the same shape as the input, but with corresponding log
probabilities.
"""
with
tf
.
variable_scope
(
'log_probabilities'
):
reduction_indices
=
len
(
logits
.
shape
.
as_list
())
-
1
max_logits
=
tf
.
reduce_max
(
logits
,
reduction_indices
=
reduction_indices
,
keep_dims
=
True
)
safe_logits
=
tf
.
subtract
(
logits
,
max_logits
)
sum_exp
=
tf
.
reduce_sum
(
tf
.
exp
(
safe_logits
),
reduction_indices
=
reduction_indices
,
keep_dims
=
True
)
log_probs
=
tf
.
subtract
(
safe_logits
,
tf
.
log
(
sum_exp
))
return
log_probs
def
variables_to_restore
(
scope
=
None
,
strip_scope
=
False
):
"""Returns a list of variables to restore for the specified list of methods.
It is supposed that variable name starts with the method's scope (a prefix
returned by _method_scope function).
Args:
methods_names: a list of names of configurable methods.
strip_scope: if True will return variable names without method's scope.
If methods_names is None will return names unchanged.
model_scope: a scope for a whole model.
Returns:
a dictionary mapping variable names to variables for restore.
"""
if
scope
:
variable_map
=
{}
method_variables
=
slim
.
get_variables_to_restore
(
include
=
[
scope
])
for
var
in
method_variables
:
if
strip_scope
:
var_name
=
var
.
op
.
name
[
len
(
scope
)
+
1
:]
else
:
var_name
=
var
.
op
.
name
variable_map
[
var_name
]
=
var
return
variable_map
else
:
return
{
v
.
op
.
name
:
v
for
v
in
slim
.
get_variables_to_restore
()}
cognitive_mapping_and_planning/.gitignore
0 → 100644
View file @
44fa1d37
deps
*.pyc
lib*.so
lib*.so*
cognitive_mapping_and_planning/README.md
0 → 100644
View file @
44fa1d37
# Cognitive Mapping and Planning for Visual Navigation
**Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik**
**Computer Vision and Pattern Recognition (CVPR) 2017.**
**
[
ArXiv
](
https://arxiv.org/abs/1702.03920
)
,
[
Project Website
](
https://sites.google.com/corp/view/cognitive-mapping-and-planning/
)
**
### Citing
If you find this code base and models useful in your research, please consider
citing the following paper:
```
@inproceedings{gupta2017cognitive,
title={Cognitive Mapping and Planning for Visual Navigation},
author={Gupta, Saurabh and Davidson, James and Levine, Sergey and
Sukthankar, Rahul and Malik, Jitendra},
booktitle={CVPR},
year={2017}
}
```
### Contents
1.
[
Requirements: software
](
#requirements-software
)
2.
[
Requirements: data
](
#requirements-data
)
3.
[
Test Pre-trained Models
](
#test-pre_trained-models
)
4.
[
Train your Own Models
](
#train-your-own-models
)
### Requirements: software
1.
Python Virtual Env Setup: All code is implemented in Python but depends on a
small number of python packages and a couple of C libraries. We recommend
using virtual environment for installing these python packages and python
bindings for these C libraries.
```
Shell
VENV_DIR=venv
pip install virtualenv
virtualenv $VENV_DIR
source $VENV_DIR/bin/activate
# You may need to upgrade pip for installing openv-python.
pip install --upgrade pip
# Install simple dependencies.
pip install -r requirements.txt
# Patch bugs in dependencies.
sh patches/apply_patches.sh
```
2.
Install
[
Tensorflow
](
https://www.tensorflow.org/
)
inside this virtual
environment. Typically done with
`pip install --upgrade tensorflow-gpu`
.
3.
Swiftshader: We use
[
Swiftshader
](
https://github.com/google/swiftshader.git
)
, a CPU based
renderer to render the meshes. It is possible to use other renderers,
replace
`SwiftshaderRenderer`
in
`render/swiftshader_renderer.py`
with
bindings to your renderer.
```
Shell
mkdir -p deps
git clone --recursive https://github.com/google/swiftshader.git deps/swiftshader-src
cd deps/swiftshader-src && git checkout 91da6b00584afd7dcaed66da88e2b617429b3950
mkdir build && cd build && cmake .. && make -j 16 libEGL libGLESv2
cd ../../../
cp deps/swiftshader-src/build/libEGL* libEGL.so.1
cp deps/swiftshader-src/build/libGLESv2* libGLESv2.so.2
```
4.
PyAssimp: We use
[
PyAssimp
](
https://github.com/assimp/assimp.git
)
to load
meshes. It is possible to use other libraries to load meshes, replace
`Shape`
`render/swiftshader_renderer.py`
with bindings to your library for
loading meshes.
```
Shell
mkdir -p deps
git clone https://github.com/assimp/assimp.git deps/assimp-src
cd deps/assimp-src
git checkout 2afeddd5cb63d14bc77b53740b38a54a97d94ee8
cmake CMakeLists.txt -G 'Unix Makefiles' && make -j 16
cd port/PyAssimp && python setup.py install
cd ../../../..
cp deps/assimp-src/lib/libassimp* .
```
5.
graph-tool: We use
[
graph-tool
](
https://git.skewed.de/count0/graph-tool
)
library for graph processing.
```
Shell
mkdir -p deps
# If the following git clone command fails, you can also download the source
# from https://downloads.skewed.de/graph-tool/graph-tool-2.2.44.tar.bz2
git clone https://git.skewed.de/count0/graph-tool deps/graph-tool-src
cd deps/graph-tool-src && git checkout 178add3a571feb6666f4f119027705d95d2951ab
bash autogen.sh
./configure --disable-cairo --disable-sparsehash --prefix=$HOME/.local
make -j 16
make install
cd ../../
```
### Requirements: data
1.
Download the Stanford 3D Indoor Spaces Dataset (S3DIS Dataset) and ImageNet
Pre-trained models for initializing different models. Follow instructions in
`data/README.md`
### Test Pre-trained Models
1.
Download pre-trained models using
`scripts/scripts_download_pretrained_models.sh`
2.
Test models using
`scripts/script_test_pretrained_models.sh`
.
### Train Your Own Models
All models were trained asynchronously with 16 workers each worker using data
from a single floor. The default hyper-parameters correspond to this setting.
See
[
distributed training with
Tensorflow
](
https://www.tensorflow.org/deploy/distributed
)
for setting up
distributed training. Training with a single worker is possible with the current
code base but will require some minor changes to allow each worker to load all
training environments.
### Contact
For questions or issues open an issue on the tensorflow/models
[
issues
tracker
](
https://github.com/tensorflow/models/issues
)
. Please assign issues to
@s-gupta.
### Credits
This code was written by Saurabh Gupta (@s-gupta).
cognitive_mapping_and_planning/__init__.py
0 → 100644
View file @
44fa1d37
cognitive_mapping_and_planning/cfgs/__init__.py
0 → 100644
View file @
44fa1d37
cognitive_mapping_and_planning/cfgs/config_cmp.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
os
,
sys
import
numpy
as
np
from
tensorflow.python.platform
import
app
from
tensorflow.python.platform
import
flags
import
logging
import
src.utils
as
utils
import
cfgs.config_common
as
cc
import
tensorflow
as
tf
rgb_resnet_v2_50_path
=
'data/init_models/resnet_v2_50/model.ckpt-5136169'
d_resnet_v2_50_path
=
'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002'
def
get_default_args
():
summary_args
=
utils
.
Foo
(
display_interval
=
1
,
test_iters
=
26
,
arop_full_summary_iters
=
14
)
control_args
=
utils
.
Foo
(
train
=
False
,
test
=
False
,
force_batchnorm_is_training_at_test
=
False
,
reset_rng_seed
=
False
,
only_eval_when_done
=
False
,
test_mode
=
None
)
return
summary_args
,
control_args
def
get_default_cmp_args
():
batch_norm_param
=
{
'center'
:
True
,
'scale'
:
True
,
'activation_fn'
:
tf
.
nn
.
relu
}
mapper_arch_args
=
utils
.
Foo
(
dim_reduce_neurons
=
64
,
fc_neurons
=
[
1024
,
1024
],
fc_out_size
=
8
,
fc_out_neurons
=
64
,
encoder
=
'resnet_v2_50'
,
deconv_neurons
=
[
64
,
32
,
16
,
8
,
4
,
2
],
deconv_strides
=
[
2
,
2
,
2
,
2
,
2
,
2
],
deconv_layers_per_block
=
2
,
deconv_kernel_size
=
4
,
fc_dropout
=
0.5
,
combine_type
=
'wt_avg_logits'
,
batch_norm_param
=
batch_norm_param
)
readout_maps_arch_args
=
utils
.
Foo
(
num_neurons
=
[],
strides
=
[],
kernel_size
=
None
,
layers_per_block
=
None
)
arch_args
=
utils
.
Foo
(
vin_val_neurons
=
8
,
vin_action_neurons
=
8
,
vin_ks
=
3
,
vin_share_wts
=
False
,
pred_neurons
=
[
64
,
64
],
pred_batch_norm_param
=
batch_norm_param
,
conv_on_value_map
=
0
,
fr_neurons
=
16
,
fr_ver
=
'v2'
,
fr_inside_neurons
=
64
,
fr_stride
=
1
,
crop_remove_each
=
30
,
value_crop_size
=
4
,
action_sample_type
=
'sample'
,
action_sample_combine_type
=
'one_or_other'
,
sample_gt_prob_type
=
'inverse_sigmoid_decay'
,
dagger_sample_bn_false
=
True
,
vin_num_iters
=
36
,
isd_k
=
750.
,
use_agent_loc
=
False
,
multi_scale
=
True
,
readout_maps
=
False
,
rom_arch
=
readout_maps_arch_args
)
return
arch_args
,
mapper_arch_args
def
get_arch_vars
(
arch_str
):
if
arch_str
==
''
:
vals
=
[]
else
:
vals
=
arch_str
.
split
(
'_'
)
ks
=
[
'var1'
,
'var2'
,
'var3'
]
ks
=
ks
[:
len
(
vals
)]
# Exp Ver.
if
len
(
vals
)
==
0
:
ks
.
append
(
'var1'
);
vals
.
append
(
'v0'
)
# custom arch.
if
len
(
vals
)
==
1
:
ks
.
append
(
'var2'
);
vals
.
append
(
''
)
# map scape for projection baseline.
if
len
(
vals
)
==
2
:
ks
.
append
(
'var3'
);
vals
.
append
(
'fr2'
)
assert
(
len
(
vals
)
==
3
)
vars
=
utils
.
Foo
()
for
k
,
v
in
zip
(
ks
,
vals
):
setattr
(
vars
,
k
,
v
)
logging
.
error
(
'arch_vars: %s'
,
vars
)
return
vars
def
process_arch_str
(
args
,
arch_str
):
# This function modifies args.
args
.
arch
,
args
.
mapper_arch
=
get_default_cmp_args
()
arch_vars
=
get_arch_vars
(
arch_str
)
args
.
navtask
.
task_params
.
outputs
.
ego_maps
=
True
args
.
navtask
.
task_params
.
outputs
.
ego_goal_imgs
=
True
args
.
navtask
.
task_params
.
outputs
.
egomotion
=
True
args
.
navtask
.
task_params
.
toy_problem
=
False
if
arch_vars
.
var1
==
'lmap'
:
args
=
process_arch_learned_map
(
args
,
arch_vars
)
elif
arch_vars
.
var1
==
'pmap'
:
args
=
process_arch_projected_map
(
args
,
arch_vars
)
else
:
logging
.
fatal
(
'arch_vars.var1 should be lmap or pmap, but is %s'
,
arch_vars
.
var1
)
assert
(
False
)
return
args
def
process_arch_learned_map
(
args
,
arch_vars
):
# Multiscale vision based system.
args
.
navtask
.
task_params
.
input_type
=
'vision'
args
.
navtask
.
task_params
.
outputs
.
images
=
True
if
args
.
navtask
.
camera_param
.
modalities
[
0
]
==
'rgb'
:
args
.
solver
.
pretrained_path
=
rgb_resnet_v2_50_path
elif
args
.
navtask
.
camera_param
.
modalities
[
0
]
==
'depth'
:
args
.
solver
.
pretrained_path
=
d_resnet_v2_50_path
if
arch_vars
.
var2
==
'Ssc'
:
sc
=
1.
/
args
.
navtask
.
task_params
.
step_size
args
.
arch
.
vin_num_iters
=
40
args
.
navtask
.
task_params
.
map_scales
=
[
sc
]
max_dist
=
args
.
navtask
.
task_params
.
max_dist
*
\
args
.
navtask
.
task_params
.
num_goals
args
.
navtask
.
task_params
.
map_crop_sizes
=
[
2
*
max_dist
]
args
.
arch
.
fr_stride
=
1
args
.
arch
.
vin_action_neurons
=
8
args
.
arch
.
vin_val_neurons
=
3
args
.
arch
.
fr_inside_neurons
=
32
args
.
mapper_arch
.
pad_map_with_zeros_each
=
[
24
]
args
.
mapper_arch
.
deconv_neurons
=
[
64
,
32
,
16
]
args
.
mapper_arch
.
deconv_strides
=
[
1
,
2
,
1
]
elif
(
arch_vars
.
var2
==
'Msc'
or
arch_vars
.
var2
==
'MscROMms'
or
arch_vars
.
var2
==
'MscROMss'
or
arch_vars
.
var2
==
'MscNoVin'
):
# Code for multi-scale planner.
args
.
arch
.
vin_num_iters
=
8
args
.
arch
.
crop_remove_each
=
4
args
.
arch
.
value_crop_size
=
8
sc
=
1.
/
args
.
navtask
.
task_params
.
step_size
max_dist
=
args
.
navtask
.
task_params
.
max_dist
*
\
args
.
navtask
.
task_params
.
num_goals
n_scales
=
np
.
log2
(
float
(
max_dist
)
/
float
(
args
.
arch
.
vin_num_iters
))
n_scales
=
int
(
np
.
ceil
(
n_scales
)
+
1
)
args
.
navtask
.
task_params
.
map_scales
=
\
list
(
sc
*
(
0.5
**
(
np
.
arange
(
n_scales
))[::
-
1
]))
args
.
navtask
.
task_params
.
map_crop_sizes
=
[
16
for
x
in
range
(
n_scales
)]
args
.
arch
.
fr_stride
=
1
args
.
arch
.
vin_action_neurons
=
8
args
.
arch
.
vin_val_neurons
=
3
args
.
arch
.
fr_inside_neurons
=
32
args
.
mapper_arch
.
pad_map_with_zeros_each
=
[
0
for
_
in
range
(
n_scales
)]
args
.
mapper_arch
.
deconv_neurons
=
[
64
*
n_scales
,
32
*
n_scales
,
16
*
n_scales
]
args
.
mapper_arch
.
deconv_strides
=
[
1
,
2
,
1
]
if
arch_vars
.
var2
==
'MscNoVin'
:
# No planning version.
args
.
arch
.
fr_stride
=
[
1
,
2
,
1
,
2
]
args
.
arch
.
vin_action_neurons
=
None
args
.
arch
.
vin_val_neurons
=
16
args
.
arch
.
fr_inside_neurons
=
32
args
.
arch
.
crop_remove_each
=
0
args
.
arch
.
value_crop_size
=
4
args
.
arch
.
vin_num_iters
=
0
elif
arch_vars
.
var2
==
'MscROMms'
or
arch_vars
.
var2
==
'MscROMss'
:
# Code with read outs, MscROMms flattens and reads out,
# MscROMss does not flatten and produces output at multiple scales.
args
.
navtask
.
task_params
.
outputs
.
readout_maps
=
True
args
.
navtask
.
task_params
.
map_resize_method
=
'antialiasing'
args
.
arch
.
readout_maps
=
True
if
arch_vars
.
var2
==
'MscROMms'
:
args
.
arch
.
rom_arch
.
num_neurons
=
[
64
,
1
]
args
.
arch
.
rom_arch
.
kernel_size
=
4
args
.
arch
.
rom_arch
.
strides
=
[
2
,
2
]
args
.
arch
.
rom_arch
.
layers_per_block
=
2
args
.
navtask
.
task_params
.
readout_maps_crop_sizes
=
[
64
]
args
.
navtask
.
task_params
.
readout_maps_scales
=
[
sc
]
elif
arch_vars
.
var2
==
'MscROMss'
:
args
.
arch
.
rom_arch
.
num_neurons
=
\
[
64
,
len
(
args
.
navtask
.
task_params
.
map_scales
)]
args
.
arch
.
rom_arch
.
kernel_size
=
4
args
.
arch
.
rom_arch
.
strides
=
[
1
,
1
]
args
.
arch
.
rom_arch
.
layers_per_block
=
1
args
.
navtask
.
task_params
.
readout_maps_crop_sizes
=
\
args
.
navtask
.
task_params
.
map_crop_sizes
args
.
navtask
.
task_params
.
readout_maps_scales
=
\
args
.
navtask
.
task_params
.
map_scales
else
:
logging
.
fatal
(
'arch_vars.var2 not one of Msc, MscROMms, MscROMss, MscNoVin.'
)
assert
(
False
)
map_channels
=
args
.
mapper_arch
.
deconv_neurons
[
-
1
]
/
\
(
2
*
len
(
args
.
navtask
.
task_params
.
map_scales
))
args
.
navtask
.
task_params
.
map_channels
=
map_channels
return
args
def
process_arch_projected_map
(
args
,
arch_vars
):
# Single scale vision based system which does not use a mapper but instead
# uses an analytically estimated map.
ds
=
int
(
arch_vars
.
var3
[
2
])
args
.
navtask
.
task_params
.
input_type
=
'analytical_counts'
args
.
navtask
.
task_params
.
outputs
.
analytical_counts
=
True
assert
(
args
.
navtask
.
task_params
.
modalities
[
0
]
==
'depth'
)
args
.
navtask
.
camera_param
.
img_channels
=
None
analytical_counts
=
utils
.
Foo
(
map_sizes
=
[
512
/
ds
],
xy_resolution
=
[
5.
*
ds
],
z_bins
=
[[
-
10
,
10
,
150
,
200
]],
non_linearity
=
[
arch_vars
.
var2
])
args
.
navtask
.
task_params
.
analytical_counts
=
analytical_counts
sc
=
1.
/
ds
args
.
arch
.
vin_num_iters
=
36
args
.
navtask
.
task_params
.
map_scales
=
[
sc
]
args
.
navtask
.
task_params
.
map_crop_sizes
=
[
512
/
ds
]
args
.
arch
.
fr_stride
=
[
1
,
2
]
args
.
arch
.
vin_action_neurons
=
8
args
.
arch
.
vin_val_neurons
=
3
args
.
arch
.
fr_inside_neurons
=
32
map_channels
=
len
(
analytical_counts
.
z_bins
[
0
])
+
1
args
.
navtask
.
task_params
.
map_channels
=
map_channels
args
.
solver
.
freeze_conv
=
False
return
args
def
get_args_for_config
(
config_name
):
args
=
utils
.
Foo
()
args
.
summary
,
args
.
control
=
get_default_args
()
exp_name
,
mode_str
=
config_name
.
split
(
'+'
)
arch_str
,
solver_str
,
navtask_str
=
exp_name
.
split
(
'.'
)
logging
.
error
(
'config_name: %s'
,
config_name
)
logging
.
error
(
'arch_str: %s'
,
arch_str
)
logging
.
error
(
'navtask_str: %s'
,
navtask_str
)
logging
.
error
(
'solver_str: %s'
,
solver_str
)
logging
.
error
(
'mode_str: %s'
,
mode_str
)
args
.
solver
=
cc
.
process_solver_str
(
solver_str
)
args
.
navtask
=
cc
.
process_navtask_str
(
navtask_str
)
args
=
process_arch_str
(
args
,
arch_str
)
args
.
arch
.
isd_k
=
args
.
solver
.
isd_k
# Train, test, etc.
mode
,
imset
=
mode_str
.
split
(
'_'
)
args
=
cc
.
adjust_args_for_mode
(
args
,
mode
)
args
.
navtask
.
building_names
=
args
.
navtask
.
dataset
.
get_split
(
imset
)
args
.
control
.
test_name
=
'{:s}_on_{:s}'
.
format
(
mode
,
imset
)
# Log the arguments
logging
.
error
(
'%s'
,
args
)
return
args
cognitive_mapping_and_planning/cfgs/config_common.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
os
import
numpy
as
np
import
logging
import
src.utils
as
utils
import
datasets.nav_env_config
as
nec
from
datasets
import
factory
def
adjust_args_for_mode
(
args
,
mode
):
if
mode
==
'train'
:
args
.
control
.
train
=
True
elif
mode
==
'val1'
:
# Same settings as for training, to make sure nothing wonky is happening
# there.
args
.
control
.
test
=
True
args
.
control
.
test_mode
=
'val'
args
.
navtask
.
task_params
.
batch_size
=
32
elif
mode
==
'val2'
:
# No data augmentation, not sampling but taking the argmax action, not
# sampling from the ground truth at all.
args
.
control
.
test
=
True
args
.
arch
.
action_sample_type
=
'argmax'
args
.
arch
.
sample_gt_prob_type
=
'zero'
args
.
navtask
.
task_params
.
data_augment
=
\
utils
.
Foo
(
lr_flip
=
0
,
delta_angle
=
0
,
delta_xy
=
0
,
relight
=
False
,
relight_fast
=
False
,
structured
=
False
)
args
.
control
.
test_mode
=
'val'
args
.
navtask
.
task_params
.
batch_size
=
32
elif
mode
==
'bench'
:
# Actually testing the agent in settings that are kept same between
# different runs.
args
.
navtask
.
task_params
.
batch_size
=
16
args
.
control
.
test
=
True
args
.
arch
.
action_sample_type
=
'argmax'
args
.
arch
.
sample_gt_prob_type
=
'zero'
args
.
navtask
.
task_params
.
data_augment
=
\
utils
.
Foo
(
lr_flip
=
0
,
delta_angle
=
0
,
delta_xy
=
0
,
relight
=
False
,
relight_fast
=
False
,
structured
=
False
)
args
.
summary
.
test_iters
=
250
args
.
control
.
only_eval_when_done
=
True
args
.
control
.
reset_rng_seed
=
True
args
.
control
.
test_mode
=
'test'
else
:
logging
.
fatal
(
'Unknown mode: %s.'
,
mode
)
assert
(
False
)
return
args
def
get_solver_vars
(
solver_str
):
if
solver_str
==
''
:
vals
=
[];
else
:
vals
=
solver_str
.
split
(
'_'
)
ks
=
[
'clip'
,
'dlw'
,
'long'
,
'typ'
,
'isdk'
,
'adam_eps'
,
'init_lr'
];
ks
=
ks
[:
len
(
vals
)]
# Gradient clipping or not.
if
len
(
vals
)
==
0
:
ks
.
append
(
'clip'
);
vals
.
append
(
'noclip'
);
# data loss weight.
if
len
(
vals
)
==
1
:
ks
.
append
(
'dlw'
);
vals
.
append
(
'dlw20'
)
# how long to train for.
if
len
(
vals
)
==
2
:
ks
.
append
(
'long'
);
vals
.
append
(
'nolong'
)
# Adam
if
len
(
vals
)
==
3
:
ks
.
append
(
'typ'
);
vals
.
append
(
'adam2'
)
# reg loss wt
if
len
(
vals
)
==
4
:
ks
.
append
(
'rlw'
);
vals
.
append
(
'rlw1'
)
# isd_k
if
len
(
vals
)
==
5
:
ks
.
append
(
'isdk'
);
vals
.
append
(
'isdk415'
)
# 415, inflexion at 2.5k.
# adam eps
if
len
(
vals
)
==
6
:
ks
.
append
(
'adam_eps'
);
vals
.
append
(
'aeps1en8'
)
# init lr
if
len
(
vals
)
==
7
:
ks
.
append
(
'init_lr'
);
vals
.
append
(
'lr1en3'
)
assert
(
len
(
vals
)
==
8
)
vars
=
utils
.
Foo
()
for
k
,
v
in
zip
(
ks
,
vals
):
setattr
(
vars
,
k
,
v
)
logging
.
error
(
'solver_vars: %s'
,
vars
)
return
vars
def
process_solver_str
(
solver_str
):
solver
=
utils
.
Foo
(
seed
=
0
,
learning_rate_decay
=
None
,
clip_gradient_norm
=
None
,
max_steps
=
None
,
initial_learning_rate
=
None
,
momentum
=
None
,
steps_per_decay
=
None
,
logdir
=
None
,
sync
=
False
,
adjust_lr_sync
=
True
,
wt_decay
=
0.0001
,
data_loss_wt
=
None
,
reg_loss_wt
=
None
,
freeze_conv
=
True
,
num_workers
=
1
,
task
=
0
,
ps_tasks
=
0
,
master
=
'local'
,
typ
=
None
,
momentum2
=
None
,
adam_eps
=
None
)
# Clobber with overrides from solver str.
solver_vars
=
get_solver_vars
(
solver_str
)
solver
.
data_loss_wt
=
float
(
solver_vars
.
dlw
[
3
:].
replace
(
'x'
,
'.'
))
solver
.
adam_eps
=
float
(
solver_vars
.
adam_eps
[
4
:].
replace
(
'x'
,
'.'
).
replace
(
'n'
,
'-'
))
solver
.
initial_learning_rate
=
float
(
solver_vars
.
init_lr
[
2
:].
replace
(
'x'
,
'.'
).
replace
(
'n'
,
'-'
))
solver
.
reg_loss_wt
=
float
(
solver_vars
.
rlw
[
3
:].
replace
(
'x'
,
'.'
))
solver
.
isd_k
=
float
(
solver_vars
.
isdk
[
4
:].
replace
(
'x'
,
'.'
))
long
=
solver_vars
.
long
if
long
==
'long'
:
solver
.
steps_per_decay
=
40000
solver
.
max_steps
=
120000
elif
long
==
'long2'
:
solver
.
steps_per_decay
=
80000
solver
.
max_steps
=
120000
elif
long
==
'nolong'
or
long
==
'nol'
:
solver
.
steps_per_decay
=
20000
solver
.
max_steps
=
60000
else
:
logging
.
fatal
(
'solver_vars.long should be long, long2, nolong or nol.'
)
assert
(
False
)
clip
=
solver_vars
.
clip
if
clip
==
'noclip'
or
clip
==
'nocl'
:
solver
.
clip_gradient_norm
=
0
elif
clip
[:
4
]
==
'clip'
:
solver
.
clip_gradient_norm
=
float
(
clip
[
4
:].
replace
(
'x'
,
'.'
))
else
:
logging
.
fatal
(
'Unknown solver_vars.clip: %s'
,
clip
)
assert
(
False
)
typ
=
solver_vars
.
typ
if
typ
==
'adam'
:
solver
.
typ
=
'adam'
solver
.
momentum
=
0.9
solver
.
momentum2
=
0.999
solver
.
learning_rate_decay
=
1.0
elif
typ
==
'adam2'
:
solver
.
typ
=
'adam'
solver
.
momentum
=
0.9
solver
.
momentum2
=
0.999
solver
.
learning_rate_decay
=
0.1
elif
typ
==
'sgd'
:
solver
.
typ
=
'sgd'
solver
.
momentum
=
0.99
solver
.
momentum2
=
None
solver
.
learning_rate_decay
=
0.1
else
:
logging
.
fatal
(
'Unknown solver_vars.typ: %s'
,
typ
)
assert
(
False
)
logging
.
error
(
'solver: %s'
,
solver
)
return
solver
def
get_navtask_vars
(
navtask_str
):
if
navtask_str
==
''
:
vals
=
[]
else
:
vals
=
navtask_str
.
split
(
'_'
)
ks_all
=
[
'dataset_name'
,
'modality'
,
'task'
,
'history'
,
'max_dist'
,
'num_steps'
,
'step_size'
,
'n_ori'
,
'aux_views'
,
'data_aug'
]
ks
=
ks_all
[:
len
(
vals
)]
# All data or not.
if
len
(
vals
)
==
0
:
ks
.
append
(
'dataset_name'
);
vals
.
append
(
'sbpd'
)
# modality
if
len
(
vals
)
==
1
:
ks
.
append
(
'modality'
);
vals
.
append
(
'rgb'
)
# semantic task?
if
len
(
vals
)
==
2
:
ks
.
append
(
'task'
);
vals
.
append
(
'r2r'
)
# number of history frames.
if
len
(
vals
)
==
3
:
ks
.
append
(
'history'
);
vals
.
append
(
'h0'
)
# max steps
if
len
(
vals
)
==
4
:
ks
.
append
(
'max_dist'
);
vals
.
append
(
'32'
)
# num steps
if
len
(
vals
)
==
5
:
ks
.
append
(
'num_steps'
);
vals
.
append
(
'40'
)
# step size
if
len
(
vals
)
==
6
:
ks
.
append
(
'step_size'
);
vals
.
append
(
'8'
)
# n_ori
if
len
(
vals
)
==
7
:
ks
.
append
(
'n_ori'
);
vals
.
append
(
'4'
)
# Auxiliary views.
if
len
(
vals
)
==
8
:
ks
.
append
(
'aux_views'
);
vals
.
append
(
'nv0'
)
# Normal data augmentation as opposed to structured data augmentation (if set
# to straug.
if
len
(
vals
)
==
9
:
ks
.
append
(
'data_aug'
);
vals
.
append
(
'straug'
)
assert
(
len
(
vals
)
==
10
)
for
i
in
range
(
len
(
ks
)):
assert
(
ks
[
i
]
==
ks_all
[
i
])
vars
=
utils
.
Foo
()
for
k
,
v
in
zip
(
ks
,
vals
):
setattr
(
vars
,
k
,
v
)
logging
.
error
(
'navtask_vars: %s'
,
vals
)
return
vars
def
process_navtask_str
(
navtask_str
):
navtask
=
nec
.
nav_env_base_config
()
# Clobber with overrides from strings.
navtask_vars
=
get_navtask_vars
(
navtask_str
)
navtask
.
task_params
.
n_ori
=
int
(
navtask_vars
.
n_ori
)
navtask
.
task_params
.
max_dist
=
int
(
navtask_vars
.
max_dist
)
navtask
.
task_params
.
num_steps
=
int
(
navtask_vars
.
num_steps
)
navtask
.
task_params
.
step_size
=
int
(
navtask_vars
.
step_size
)
navtask
.
task_params
.
data_augment
.
delta_xy
=
int
(
navtask_vars
.
step_size
)
/
2.
n_aux_views_each
=
int
(
navtask_vars
.
aux_views
[
2
])
aux_delta_thetas
=
np
.
concatenate
((
np
.
arange
(
n_aux_views_each
)
+
1
,
-
1
-
np
.
arange
(
n_aux_views_each
)))
aux_delta_thetas
=
aux_delta_thetas
*
np
.
deg2rad
(
navtask
.
camera_param
.
fov
)
navtask
.
task_params
.
aux_delta_thetas
=
aux_delta_thetas
if
navtask_vars
.
data_aug
==
'aug'
:
navtask
.
task_params
.
data_augment
.
structured
=
False
elif
navtask_vars
.
data_aug
==
'straug'
:
navtask
.
task_params
.
data_augment
.
structured
=
True
else
:
logging
.
fatal
(
'Unknown navtask_vars.data_aug %s.'
,
navtask_vars
.
data_aug
)
assert
(
False
)
navtask
.
task_params
.
num_history_frames
=
int
(
navtask_vars
.
history
[
1
:])
navtask
.
task_params
.
n_views
=
1
+
navtask
.
task_params
.
num_history_frames
navtask
.
task_params
.
goal_channels
=
int
(
navtask_vars
.
n_ori
)
if
navtask_vars
.
task
==
'hard'
:
navtask
.
task_params
.
type
=
'rng_rejection_sampling_many'
navtask
.
task_params
.
rejection_sampling_M
=
2000
navtask
.
task_params
.
min_dist
=
10
elif
navtask_vars
.
task
==
'r2r'
:
navtask
.
task_params
.
type
=
'room_to_room_many'
elif
navtask_vars
.
task
==
'ST'
:
# Semantic task at hand.
navtask
.
task_params
.
goal_channels
=
\
len
(
navtask
.
task_params
.
semantic_task
.
class_map_names
)
navtask
.
task_params
.
rel_goal_loc_dim
=
\
len
(
navtask
.
task_params
.
semantic_task
.
class_map_names
)
navtask
.
task_params
.
type
=
'to_nearest_obj_acc'
else
:
logging
.
fatal
(
'navtask_vars.task: should be hard or r2r, ST'
)
assert
(
False
)
if
navtask_vars
.
modality
==
'rgb'
:
navtask
.
camera_param
.
modalities
=
[
'rgb'
]
navtask
.
camera_param
.
img_channels
=
3
elif
navtask_vars
.
modality
==
'd'
:
navtask
.
camera_param
.
modalities
=
[
'depth'
]
navtask
.
camera_param
.
img_channels
=
2
navtask
.
task_params
.
img_height
=
navtask
.
camera_param
.
height
navtask
.
task_params
.
img_width
=
navtask
.
camera_param
.
width
navtask
.
task_params
.
modalities
=
navtask
.
camera_param
.
modalities
navtask
.
task_params
.
img_channels
=
navtask
.
camera_param
.
img_channels
navtask
.
task_params
.
img_fov
=
navtask
.
camera_param
.
fov
navtask
.
dataset
=
factory
.
get_dataset
(
navtask_vars
.
dataset_name
)
return
navtask
cognitive_mapping_and_planning/cfgs/config_distill.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
pprint
import
copy
import
os
from
tensorflow.python.platform
import
app
from
tensorflow.python.platform
import
flags
import
logging
import
src.utils
as
utils
import
cfgs.config_common
as
cc
import
tensorflow
as
tf
rgb_resnet_v2_50_path
=
'cache/resnet_v2_50_inception_preprocessed/model.ckpt-5136169'
def
get_default_args
():
robot
=
utils
.
Foo
(
radius
=
15
,
base
=
10
,
height
=
140
,
sensor_height
=
120
,
camera_elevation_degree
=-
15
)
camera_param
=
utils
.
Foo
(
width
=
225
,
height
=
225
,
z_near
=
0.05
,
z_far
=
20.0
,
fov
=
60.
,
modalities
=
[
'rgb'
,
'depth'
])
env
=
utils
.
Foo
(
padding
=
10
,
resolution
=
5
,
num_point_threshold
=
2
,
valid_min
=-
10
,
valid_max
=
200
,
n_samples_per_face
=
200
)
data_augment
=
utils
.
Foo
(
lr_flip
=
0
,
delta_angle
=
1
,
delta_xy
=
4
,
relight
=
False
,
relight_fast
=
False
,
structured
=
False
)
task_params
=
utils
.
Foo
(
num_actions
=
4
,
step_size
=
4
,
num_steps
=
0
,
batch_size
=
32
,
room_seed
=
0
,
base_class
=
'Building'
,
task
=
'mapping'
,
n_ori
=
6
,
data_augment
=
data_augment
,
output_transform_to_global_map
=
False
,
output_canonical_map
=
False
,
output_incremental_transform
=
False
,
output_free_space
=
False
,
move_type
=
'shortest_path'
,
toy_problem
=
0
)
buildinger_args
=
utils
.
Foo
(
building_names
=
[
'area1_gates_wingA_floor1_westpart'
],
env_class
=
None
,
robot
=
robot
,
task_params
=
task_params
,
env
=
env
,
camera_param
=
camera_param
)
solver_args
=
utils
.
Foo
(
seed
=
0
,
learning_rate_decay
=
0.1
,
clip_gradient_norm
=
0
,
max_steps
=
120000
,
initial_learning_rate
=
0.001
,
momentum
=
0.99
,
steps_per_decay
=
40000
,
logdir
=
None
,
sync
=
False
,
adjust_lr_sync
=
True
,
wt_decay
=
0.0001
,
data_loss_wt
=
1.0
,
reg_loss_wt
=
1.0
,
num_workers
=
1
,
task
=
0
,
ps_tasks
=
0
,
master
=
'local'
)
summary_args
=
utils
.
Foo
(
display_interval
=
1
,
test_iters
=
100
)
control_args
=
utils
.
Foo
(
train
=
False
,
test
=
False
,
force_batchnorm_is_training_at_test
=
False
)
arch_args
=
utils
.
Foo
(
rgb_encoder
=
'resnet_v2_50'
,
d_encoder
=
'resnet_v2_50'
)
return
utils
.
Foo
(
solver
=
solver_args
,
summary
=
summary_args
,
control
=
control_args
,
arch
=
arch_args
,
buildinger
=
buildinger_args
)
def
get_vars
(
config_name
):
vars
=
config_name
.
split
(
'_'
)
if
len
(
vars
)
==
1
:
# All data or not.
vars
.
append
(
'noall'
)
if
len
(
vars
)
==
2
:
# n_ori
vars
.
append
(
'4'
)
logging
.
error
(
'vars: %s'
,
vars
)
return
vars
def
get_args_for_config
(
config_name
):
args
=
get_default_args
()
config_name
,
mode
=
config_name
.
split
(
'+'
)
vars
=
get_vars
(
config_name
)
logging
.
info
(
'config_name: %s, mode: %s'
,
config_name
,
mode
)
args
.
buildinger
.
task_params
.
n_ori
=
int
(
vars
[
2
])
args
.
solver
.
freeze_conv
=
True
args
.
solver
.
pretrained_path
=
resnet_v2_50_path
args
.
buildinger
.
task_params
.
img_channels
=
5
args
.
solver
.
data_loss_wt
=
0.00001
if
vars
[
0
]
==
'v0'
:
None
else
:
logging
.
error
(
'config_name: %s undefined'
,
config_name
)
args
.
buildinger
.
task_params
.
height
=
args
.
buildinger
.
camera_param
.
height
args
.
buildinger
.
task_params
.
width
=
args
.
buildinger
.
camera_param
.
width
args
.
buildinger
.
task_params
.
modalities
=
args
.
buildinger
.
camera_param
.
modalities
if
vars
[
1
]
==
'all'
:
args
=
cc
.
get_args_for_mode_building_all
(
args
,
mode
)
elif
vars
[
1
]
==
'noall'
:
args
=
cc
.
get_args_for_mode_building
(
args
,
mode
)
# Log the arguments
logging
.
error
(
'%s'
,
args
)
return
args
cognitive_mapping_and_planning/cfgs/config_vision_baseline.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
pprint
import
os
import
numpy
as
np
from
tensorflow.python.platform
import
app
from
tensorflow.python.platform
import
flags
import
logging
import
src.utils
as
utils
import
cfgs.config_common
as
cc
import
datasets.nav_env_config
as
nec
import
tensorflow
as
tf
FLAGS
=
flags
.
FLAGS
get_solver_vars
=
cc
.
get_solver_vars
get_navtask_vars
=
cc
.
get_navtask_vars
rgb_resnet_v2_50_path
=
'data/init_models/resnet_v2_50/model.ckpt-5136169'
d_resnet_v2_50_path
=
'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002'
def
get_default_args
():
summary_args
=
utils
.
Foo
(
display_interval
=
1
,
test_iters
=
26
,
arop_full_summary_iters
=
14
)
control_args
=
utils
.
Foo
(
train
=
False
,
test
=
False
,
force_batchnorm_is_training_at_test
=
False
,
reset_rng_seed
=
False
,
only_eval_when_done
=
False
,
test_mode
=
None
)
return
summary_args
,
control_args
def
get_default_baseline_args
():
batch_norm_param
=
{
'center'
:
True
,
'scale'
:
True
,
'activation_fn'
:
tf
.
nn
.
relu
}
arch_args
=
utils
.
Foo
(
pred_neurons
=
[],
goal_embed_neurons
=
[],
img_embed_neurons
=
[],
batch_norm_param
=
batch_norm_param
,
dim_reduce_neurons
=
64
,
combine_type
=
''
,
encoder
=
'resnet_v2_50'
,
action_sample_type
=
'sample'
,
action_sample_combine_type
=
'one_or_other'
,
sample_gt_prob_type
=
'inverse_sigmoid_decay'
,
dagger_sample_bn_false
=
True
,
isd_k
=
750.
,
use_visit_count
=
False
,
lstm_output
=
False
,
lstm_ego
=
False
,
lstm_img
=
False
,
fc_dropout
=
0.0
,
embed_goal_for_state
=
False
,
lstm_output_init_state_from_goal
=
False
)
return
arch_args
def
get_arch_vars
(
arch_str
):
if
arch_str
==
''
:
vals
=
[]
else
:
vals
=
arch_str
.
split
(
'_'
)
ks
=
[
'ver'
,
'lstm_dim'
,
'dropout'
]
# Exp Ver
if
len
(
vals
)
==
0
:
vals
.
append
(
'v0'
)
# LSTM dimentsions
if
len
(
vals
)
==
1
:
vals
.
append
(
'lstm2048'
)
# Dropout
if
len
(
vals
)
==
2
:
vals
.
append
(
'noDO'
)
assert
(
len
(
vals
)
==
3
)
vars
=
utils
.
Foo
()
for
k
,
v
in
zip
(
ks
,
vals
):
setattr
(
vars
,
k
,
v
)
logging
.
error
(
'arch_vars: %s'
,
vars
)
return
vars
def
process_arch_str
(
args
,
arch_str
):
# This function modifies args.
args
.
arch
=
get_default_baseline_args
()
arch_vars
=
get_arch_vars
(
arch_str
)
args
.
navtask
.
task_params
.
outputs
.
rel_goal_loc
=
True
args
.
navtask
.
task_params
.
input_type
=
'vision'
args
.
navtask
.
task_params
.
outputs
.
images
=
True
if
args
.
navtask
.
camera_param
.
modalities
[
0
]
==
'rgb'
:
args
.
solver
.
pretrained_path
=
rgb_resnet_v2_50_path
elif
args
.
navtask
.
camera_param
.
modalities
[
0
]
==
'depth'
:
args
.
solver
.
pretrained_path
=
d_resnet_v2_50_path
else
:
logging
.
fatal
(
'Neither of rgb or d'
)
if
arch_vars
.
dropout
==
'DO'
:
args
.
arch
.
fc_dropout
=
0.5
args
.
tfcode
=
'B'
exp_ver
=
arch_vars
.
ver
if
exp_ver
==
'v0'
:
# Multiplicative interaction between goal loc and image features.
args
.
arch
.
combine_type
=
'multiply'
args
.
arch
.
pred_neurons
=
[
256
,
256
]
args
.
arch
.
goal_embed_neurons
=
[
64
,
8
]
args
.
arch
.
img_embed_neurons
=
[
1024
,
512
,
256
*
8
]
elif
exp_ver
==
'v1'
:
# Additive interaction between goal and image features.
args
.
arch
.
combine_type
=
'add'
args
.
arch
.
pred_neurons
=
[
256
,
256
]
args
.
arch
.
goal_embed_neurons
=
[
64
,
256
]
args
.
arch
.
img_embed_neurons
=
[
1024
,
512
,
256
]
elif
exp_ver
==
'v2'
:
# LSTM at the output on top of multiple interactions.
args
.
arch
.
combine_type
=
'multiply'
args
.
arch
.
goal_embed_neurons
=
[
64
,
8
]
args
.
arch
.
img_embed_neurons
=
[
1024
,
512
,
256
*
8
]
args
.
arch
.
lstm_output
=
True
args
.
arch
.
lstm_output_dim
=
int
(
arch_vars
.
lstm_dim
[
4
:])
args
.
arch
.
pred_neurons
=
[
256
]
# The other is inside the LSTM.
elif
exp_ver
==
'v0blind'
:
# LSTM only on the goal location.
args
.
arch
.
combine_type
=
'goalonly'
args
.
arch
.
goal_embed_neurons
=
[
64
,
256
]
args
.
arch
.
img_embed_neurons
=
[
2
]
# I dont know what it will do otherwise.
args
.
arch
.
lstm_output
=
True
args
.
arch
.
lstm_output_dim
=
256
args
.
arch
.
pred_neurons
=
[
256
]
# The other is inside the LSTM.
else
:
logging
.
fatal
(
'exp_ver: %s undefined'
,
exp_ver
)
assert
(
False
)
# Log the arguments
logging
.
error
(
'%s'
,
args
)
return
args
def
get_args_for_config
(
config_name
):
args
=
utils
.
Foo
()
args
.
summary
,
args
.
control
=
get_default_args
()
exp_name
,
mode_str
=
config_name
.
split
(
'+'
)
arch_str
,
solver_str
,
navtask_str
=
exp_name
.
split
(
'.'
)
logging
.
error
(
'config_name: %s'
,
config_name
)
logging
.
error
(
'arch_str: %s'
,
arch_str
)
logging
.
error
(
'navtask_str: %s'
,
navtask_str
)
logging
.
error
(
'solver_str: %s'
,
solver_str
)
logging
.
error
(
'mode_str: %s'
,
mode_str
)
args
.
solver
=
cc
.
process_solver_str
(
solver_str
)
args
.
navtask
=
cc
.
process_navtask_str
(
navtask_str
)
args
=
process_arch_str
(
args
,
arch_str
)
args
.
arch
.
isd_k
=
args
.
solver
.
isd_k
# Train, test, etc.
mode
,
imset
=
mode_str
.
split
(
'_'
)
args
=
cc
.
adjust_args_for_mode
(
args
,
mode
)
args
.
navtask
.
building_names
=
args
.
navtask
.
dataset
.
get_split
(
imset
)
args
.
control
.
test_name
=
'{:s}_on_{:s}'
.
format
(
mode
,
imset
)
# Log the arguments
logging
.
error
(
'%s'
,
args
)
return
args
cognitive_mapping_and_planning/data/.gitignore
0 → 100644
View file @
44fa1d37
stanford_building_parser_dataset_raw
stanford_building_parser_dataset
init_models
cognitive_mapping_and_planning/data/README.md
0 → 100644
View file @
44fa1d37
This directory contains the data needed for training and benchmarking various
navigation models.
1.
Download the data from the [dataset website]
(http://buildingparser.stanford.edu/dataset.html).
1.
[
Raw meshes
](
https://goo.gl/forms/2YSPaO2UKmn5Td5m2
)
. We need the meshes
which are in the noXYZ folder. Download the tar files and place them in
the
`stanford_building_parser_dataset_raw`
folder. You need to download
`area_1_noXYZ.tar`
,
`area_3_noXYZ.tar`
,
`area_5a_noXYZ.tar`
,
`area_5b_noXYZ.tar`
,
`area_6_noXYZ.tar`
for training and
`area_4_noXYZ.tar`
for evaluation.
2.
[
Annotations
](
https://goo.gl/forms/4SoGp4KtH1jfRqEj2
)
for setting up
tasks. We will need the file called
`Stanford3dDataset_v1.2.zip`
. Place
the file in the directory
`stanford_building_parser_dataset_raw`
.
2.
Preprocess the data.
1.
Extract meshes using
`scripts/script_preprocess_meshes_S3DIS.sh`
. After
this
`ls data/stanford_building_parser_dataset/mesh`
should have 6
folders
`area1`
,
`area3`
,
`area4`
,
`area5a`
,
`area5b`
,
`area6`
, with
textures and obj files within each directory.
2.
Extract out room information and semantics from zip file using
`scripts/script_preprocess_annoations_S3DIS.sh`
. After this there should
be
`room-dimension`
and
`class-maps`
folder in
`data/stanford_building_parser_dataset`
. (If you find this script to
crash because of an exception in np.loadtxt while processing
`Area_5/office_19/Annotations/ceiling_1.txt`
, there is a special
character on line 323474, that should be removed manually.)
3.
Download ImageNet Pre-trained models. We used ResNet-v2-50 for representing
images. For RGB images this is pre-trained on ImageNet. For Depth images we
[
distill
](
https://arxiv.org/abs/1507.00448
)
the RGB model to depth images
using paired RGB-D images. Both there models are available through
`scripts/script_download_init_models.sh`
cognitive_mapping_and_planning/datasets/__init__.py
0 → 100644
View file @
44fa1d37
cognitive_mapping_and_planning/datasets/factory.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Wrapper for selecting the navigation environment that we want to train and
test on.
"""
import
numpy
as
np
import
os
,
glob
import
platform
import
logging
from
tensorflow.python.platform
import
app
from
tensorflow.python.platform
import
flags
import
render.swiftshader_renderer
as
renderer
import
src.file_utils
as
fu
import
src.utils
as
utils
def
get_dataset
(
dataset_name
):
if
dataset_name
==
'sbpd'
:
dataset
=
StanfordBuildingParserDataset
(
dataset_name
)
else
:
logging
.
fatal
(
'Not one of sbpd'
)
return
dataset
class
Loader
():
def
get_data_dir
():
pass
def
get_meta_data
(
self
,
file_name
,
data_dir
=
None
):
if
data_dir
is
None
:
data_dir
=
self
.
get_data_dir
()
full_file_name
=
os
.
path
.
join
(
data_dir
,
'meta'
,
file_name
)
assert
(
fu
.
exists
(
full_file_name
)),
\
'{:s} does not exist'
.
format
(
full_file_name
)
ext
=
os
.
path
.
splitext
(
full_file_name
)[
1
]
if
ext
==
'.txt'
:
ls
=
[]
with
fu
.
fopen
(
full_file_name
,
'r'
)
as
f
:
for
l
in
f
:
ls
.
append
(
l
.
rstrip
())
elif
ext
==
'.pkl'
:
ls
=
utils
.
load_variables
(
full_file_name
)
return
ls
def
load_building
(
self
,
name
,
data_dir
=
None
):
if
data_dir
is
None
:
data_dir
=
self
.
get_data_dir
()
out
=
{}
out
[
'name'
]
=
name
out
[
'data_dir'
]
=
data_dir
out
[
'room_dimension_file'
]
=
os
.
path
.
join
(
data_dir
,
'room-dimension'
,
name
+
'.pkl'
)
out
[
'class_map_folder'
]
=
os
.
path
.
join
(
data_dir
,
'class-maps'
)
return
out
def
load_building_meshes
(
self
,
building
):
dir_name
=
os
.
path
.
join
(
building
[
'data_dir'
],
'mesh'
,
building
[
'name'
])
mesh_file_name
=
glob
.
glob1
(
dir_name
,
'*.obj'
)[
0
]
mesh_file_name_full
=
os
.
path
.
join
(
dir_name
,
mesh_file_name
)
logging
.
error
(
'Loading building from obj file: %s'
,
mesh_file_name_full
)
shape
=
renderer
.
Shape
(
mesh_file_name_full
,
load_materials
=
True
,
name_prefix
=
building
[
'name'
]
+
'_'
)
return
[
shape
]
class
StanfordBuildingParserDataset
(
Loader
):
def
__init__
(
self
,
ver
):
self
.
ver
=
ver
self
.
data_dir
=
None
def
get_data_dir
(
self
):
if
self
.
data_dir
is
None
:
self
.
data_dir
=
'data/stanford_building_parser_dataset/'
return
self
.
data_dir
def
get_benchmark_sets
(
self
):
return
self
.
_get_benchmark_sets
()
def
get_split
(
self
,
split_name
):
if
self
.
ver
==
'sbpd'
:
return
self
.
_get_split
(
split_name
)
else
:
logging
.
fatal
(
'Unknown version.'
)
def
_get_benchmark_sets
(
self
):
sets
=
[
'train1'
,
'val'
,
'test'
]
return
sets
def
_get_split
(
self
,
split_name
):
train
=
[
'area1'
,
'area5a'
,
'area5b'
,
'area6'
]
train1
=
[
'area1'
]
val
=
[
'area3'
]
test
=
[
'area4'
]
sets
=
{}
sets
[
'train'
]
=
train
sets
[
'train1'
]
=
train1
sets
[
'val'
]
=
val
sets
[
'test'
]
=
test
sets
[
'all'
]
=
sorted
(
list
(
set
(
train
+
val
+
test
)))
return
sets
[
split_name
]
cognitive_mapping_and_planning/datasets/nav_env.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Navidation Environment. Includes the following classes along with some
helper functions.
Building: Loads buildings, computes traversibility, exposes functionality for
rendering images.
GridWorld: Base class which implements functionality for moving an agent on a
grid world.
NavigationEnv: Base class which generates navigation problems on a grid world.
VisualNavigationEnv: Builds upon NavigationEnv and Building to provide
interface that is used externally to train the agent.
MeshMapper: Class used for distilling the model, testing the mapper.
BuildingMultiplexer: Wrapper class that instantiates a VisualNavigationEnv for
each building and multiplexes between them as needed.
"""
import
numpy
as
np
import
os
import
re
import
matplotlib.pyplot
as
plt
import
graph_tool
as
gt
import
graph_tool.topology
from
tensorflow.python.platform
import
gfile
import
logging
import
src.file_utils
as
fu
import
src.utils
as
utils
import
src.graph_utils
as
gu
import
src.map_utils
as
mu
import
src.depth_utils
as
du
import
render.swiftshader_renderer
as
sru
from
render.swiftshader_renderer
import
SwiftshaderRenderer
import
cv2
label_nodes_with_class
=
gu
.
label_nodes_with_class
label_nodes_with_class_geodesic
=
gu
.
label_nodes_with_class_geodesic
get_distance_node_list
=
gu
.
get_distance_node_list
convert_to_graph_tool
=
gu
.
convert_to_graph_tool
generate_graph
=
gu
.
generate_graph
get_hardness_distribution
=
gu
.
get_hardness_distribution
rng_next_goal_rejection_sampling
=
gu
.
rng_next_goal_rejection_sampling
rng_next_goal
=
gu
.
rng_next_goal
rng_room_to_room
=
gu
.
rng_room_to_room
rng_target_dist_field
=
gu
.
rng_target_dist_field
compute_traversibility
=
mu
.
compute_traversibility
make_map
=
mu
.
make_map
resize_maps
=
mu
.
resize_maps
pick_largest_cc
=
mu
.
pick_largest_cc
get_graph_origin_loc
=
mu
.
get_graph_origin_loc
generate_egocentric_maps
=
mu
.
generate_egocentric_maps
generate_goal_images
=
mu
.
generate_goal_images
get_map_to_predict
=
mu
.
get_map_to_predict
bin_points
=
du
.
bin_points
make_geocentric
=
du
.
make_geocentric
get_point_cloud_from_z
=
du
.
get_point_cloud_from_z
get_camera_matrix
=
du
.
get_camera_matrix
def
_get_semantic_maps
(
folder_name
,
building_name
,
map
,
flip
):
# Load file from the cache.
file_name
=
'{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl'
file_name
=
file_name
.
format
(
building_name
,
map
.
size
[
0
],
map
.
size
[
1
],
map
.
origin
[
0
],
map
.
origin
[
1
],
map
.
resolution
,
flip
)
file_name
=
os
.
path
.
join
(
folder_name
,
file_name
)
logging
.
info
(
'Loading semantic maps from %s.'
,
file_name
)
if
fu
.
exists
(
file_name
):
a
=
utils
.
load_variables
(
file_name
)
maps
=
a
[
'maps'
]
#HxWx#C
cats
=
a
[
'cats'
]
else
:
logging
.
error
(
'file_name: %s not found.'
,
file_name
)
maps
=
None
cats
=
None
return
maps
,
cats
def
_select_classes
(
all_maps
,
all_cats
,
cats_to_use
):
inds
=
[]
for
c
in
cats_to_use
:
ind
=
all_cats
.
index
(
c
)
inds
.
append
(
ind
)
out_maps
=
all_maps
[:,:,
inds
]
return
out_maps
def
_get_room_dimensions
(
file_name
,
resolution
,
origin
,
flip
=
False
):
if
fu
.
exists
(
file_name
):
a
=
utils
.
load_variables
(
file_name
)[
'room_dimension'
]
names
=
a
.
keys
()
dims
=
np
.
concatenate
(
a
.
values
(),
axis
=
0
).
reshape
((
-
1
,
6
))
ind
=
np
.
argsort
(
names
)
dims
=
dims
[
ind
,:]
names
=
[
names
[
x
]
for
x
in
ind
]
if
flip
:
dims_new
=
dims
*
1
dims_new
[:,
1
]
=
-
dims
[:,
4
]
dims_new
[:,
4
]
=
-
dims
[:,
1
]
dims
=
dims_new
*
1
dims
=
dims
*
100.
dims
[:,
0
]
=
dims
[:,
0
]
-
origin
[
0
]
dims
[:,
1
]
=
dims
[:,
1
]
-
origin
[
1
]
dims
[:,
3
]
=
dims
[:,
3
]
-
origin
[
0
]
dims
[:,
4
]
=
dims
[:,
4
]
-
origin
[
1
]
dims
=
dims
/
resolution
out
=
{
'names'
:
names
,
'dims'
:
dims
}
else
:
out
=
None
return
out
def
_filter_rooms
(
room_dims
,
room_regex
):
pattern
=
re
.
compile
(
room_regex
)
ind
=
[]
for
i
,
name
in
enumerate
(
room_dims
[
'names'
]):
if
pattern
.
match
(
name
):
ind
.
append
(
i
)
new_room_dims
=
{}
new_room_dims
[
'names'
]
=
[
room_dims
[
'names'
][
i
]
for
i
in
ind
]
new_room_dims
[
'dims'
]
=
room_dims
[
'dims'
][
ind
,:]
*
1
return
new_room_dims
def
_label_nodes_with_room_id
(
xyt
,
room_dims
):
# Label the room with the ID into things.
node_room_id
=
-
1
*
np
.
ones
((
xyt
.
shape
[
0
],
1
))
dims
=
room_dims
[
'dims'
]
for
x
,
name
in
enumerate
(
room_dims
[
'names'
]):
all_
=
np
.
concatenate
((
xyt
[:,[
0
]]
>=
dims
[
x
,
0
],
xyt
[:,[
0
]]
<=
dims
[
x
,
3
],
xyt
[:,[
1
]]
>=
dims
[
x
,
1
],
xyt
[:,[
1
]]
<=
dims
[
x
,
4
]),
axis
=
1
)
node_room_id
[
np
.
all
(
all_
,
axis
=
1
),
0
]
=
x
return
node_room_id
def
get_path_ids
(
start_node_id
,
end_node_id
,
pred_map
):
id
=
start_node_id
path
=
[
id
]
while
id
!=
end_node_id
:
id
=
pred_map
[
id
]
path
.
append
(
id
)
return
path
def
image_pre
(
images
,
modalities
):
# Assumes images are ...xHxWxC.
# We always assume images are RGB followed by Depth.
if
'depth'
in
modalities
:
d
=
images
[...,
-
1
][...,
np
.
newaxis
]
*
1.
d
[
d
<
0.01
]
=
np
.
NaN
;
isnan
=
np
.
isnan
(
d
);
d
=
100.
/
d
;
d
[
isnan
]
=
0.
;
images
=
np
.
concatenate
((
images
[...,:
-
1
],
d
,
isnan
),
axis
=
images
.
ndim
-
1
)
if
'rgb'
in
modalities
:
images
[...,:
3
]
=
images
[...,:
3
]
*
1.
-
128
return
images
def
_get_relative_goal_loc
(
goal_loc
,
loc
,
theta
):
r
=
np
.
sqrt
(
np
.
sum
(
np
.
square
(
goal_loc
-
loc
),
axis
=
1
))
t
=
np
.
arctan2
(
goal_loc
[:,
1
]
-
loc
[:,
1
],
goal_loc
[:,
0
]
-
loc
[:,
0
])
t
=
t
-
theta
[:,
0
]
+
np
.
pi
/
2
return
np
.
expand_dims
(
r
,
axis
=
1
),
np
.
expand_dims
(
t
,
axis
=
1
)
def
_gen_perturbs
(
rng
,
batch_size
,
num_steps
,
lr_flip
,
delta_angle
,
delta_xy
,
structured
):
perturbs
=
[]
for
i
in
range
(
batch_size
):
# Doing things one by one for each episode in this batch. This way this
# remains replicatable even when we change the batch size.
p
=
np
.
zeros
((
num_steps
+
1
,
4
))
if
lr_flip
:
# Flip the whole trajectory.
p
[:,
3
]
=
rng
.
rand
(
1
)
-
0.5
if
delta_angle
>
0
:
if
structured
:
p
[:,
2
]
=
(
rng
.
rand
(
1
)
-
0.5
)
*
delta_angle
else
:
p
[:,
2
]
=
(
rng
.
rand
(
p
.
shape
[
0
])
-
0.5
)
*
delta_angle
if
delta_xy
>
0
:
if
structured
:
p
[:,:
2
]
=
(
rng
.
rand
(
1
,
2
)
-
0.5
)
*
delta_xy
else
:
p
[:,:
2
]
=
(
rng
.
rand
(
p
.
shape
[
0
],
2
)
-
0.5
)
*
delta_xy
perturbs
.
append
(
p
)
return
perturbs
def
get_multiplexer_class
(
args
,
task_number
):
assert
(
args
.
task_params
.
base_class
==
'Building'
)
logging
.
info
(
'Returning BuildingMultiplexer'
)
R
=
BuildingMultiplexer
(
args
,
task_number
)
return
R
class
GridWorld
():
def
__init__
(
self
):
"""Class members that will be assigned by any class that actually uses this
class."""
self
.
restrict_to_largest_cc
=
None
self
.
robot
=
None
self
.
env
=
None
self
.
category_list
=
None
self
.
traversible
=
None
def
get_loc_axis
(
self
,
node
,
delta_theta
,
perturb
=
None
):
"""Based on the node orientation returns X, and Y axis. Used to sample the
map in egocentric coordinate frame.
"""
if
type
(
node
)
==
tuple
:
node
=
np
.
array
([
node
])
if
perturb
is
None
:
perturb
=
np
.
zeros
((
node
.
shape
[
0
],
4
))
xyt
=
self
.
to_actual_xyt_vec
(
node
)
x
=
xyt
[:,[
0
]]
+
perturb
[:,[
0
]]
y
=
xyt
[:,[
1
]]
+
perturb
[:,[
1
]]
t
=
xyt
[:,[
2
]]
+
perturb
[:,[
2
]]
theta
=
t
*
delta_theta
loc
=
np
.
concatenate
((
x
,
y
),
axis
=
1
)
x_axis
=
np
.
concatenate
((
np
.
cos
(
theta
),
np
.
sin
(
theta
)),
axis
=
1
)
y_axis
=
np
.
concatenate
((
np
.
cos
(
theta
+
np
.
pi
/
2.
),
np
.
sin
(
theta
+
np
.
pi
/
2.
)),
axis
=
1
)
# Flip the sampled map where need be.
y_axis
[
np
.
where
(
perturb
[:,
3
]
>
0
)[
0
],
:]
*=
-
1.
return
loc
,
x_axis
,
y_axis
,
theta
def
to_actual_xyt
(
self
,
pqr
):
"""Converts from node to location on the map."""
(
p
,
q
,
r
)
=
pqr
if
self
.
task
.
n_ori
==
6
:
out
=
(
p
-
q
*
0.5
+
self
.
task
.
origin_loc
[
0
],
q
*
np
.
sqrt
(
3.
)
/
2.
+
self
.
task
.
origin_loc
[
1
],
r
)
elif
self
.
task
.
n_ori
==
4
:
out
=
(
p
+
self
.
task
.
origin_loc
[
0
],
q
+
self
.
task
.
origin_loc
[
1
],
r
)
return
out
def
to_actual_xyt_vec
(
self
,
pqr
):
"""Converts from node array to location array on the map."""
p
=
pqr
[:,
0
][:,
np
.
newaxis
]
q
=
pqr
[:,
1
][:,
np
.
newaxis
]
r
=
pqr
[:,
2
][:,
np
.
newaxis
]
if
self
.
task
.
n_ori
==
6
:
out
=
np
.
concatenate
((
p
-
q
*
0.5
+
self
.
task
.
origin_loc
[
0
],
q
*
np
.
sqrt
(
3.
)
/
2.
+
self
.
task
.
origin_loc
[
1
],
r
),
axis
=
1
)
elif
self
.
task
.
n_ori
==
4
:
out
=
np
.
concatenate
((
p
+
self
.
task
.
origin_loc
[
0
],
q
+
self
.
task
.
origin_loc
[
1
],
r
),
axis
=
1
)
return
out
def
raw_valid_fn_vec
(
self
,
xyt
):
"""Returns if the given set of nodes is valid or not."""
height
=
self
.
traversible
.
shape
[
0
]
width
=
self
.
traversible
.
shape
[
1
]
x
=
np
.
round
(
xyt
[:,[
0
]]).
astype
(
np
.
int32
)
y
=
np
.
round
(
xyt
[:,[
1
]]).
astype
(
np
.
int32
)
is_inside
=
np
.
all
(
np
.
concatenate
((
x
>=
0
,
y
>=
0
,
x
<
width
,
y
<
height
),
axis
=
1
),
axis
=
1
)
x
=
np
.
minimum
(
np
.
maximum
(
x
,
0
),
width
-
1
)
y
=
np
.
minimum
(
np
.
maximum
(
y
,
0
),
height
-
1
)
ind
=
np
.
ravel_multi_index
((
y
,
x
),
self
.
traversible
.
shape
)
is_traversible
=
self
.
traversible
.
ravel
()[
ind
]
is_valid
=
np
.
all
(
np
.
concatenate
((
is_inside
[:,
np
.
newaxis
],
is_traversible
),
axis
=
1
),
axis
=
1
)
return
is_valid
def
valid_fn_vec
(
self
,
pqr
):
"""Returns if the given set of nodes is valid or not."""
xyt
=
self
.
to_actual_xyt_vec
(
np
.
array
(
pqr
))
height
=
self
.
traversible
.
shape
[
0
]
width
=
self
.
traversible
.
shape
[
1
]
x
=
np
.
round
(
xyt
[:,[
0
]]).
astype
(
np
.
int32
)
y
=
np
.
round
(
xyt
[:,[
1
]]).
astype
(
np
.
int32
)
is_inside
=
np
.
all
(
np
.
concatenate
((
x
>=
0
,
y
>=
0
,
x
<
width
,
y
<
height
),
axis
=
1
),
axis
=
1
)
x
=
np
.
minimum
(
np
.
maximum
(
x
,
0
),
width
-
1
)
y
=
np
.
minimum
(
np
.
maximum
(
y
,
0
),
height
-
1
)
ind
=
np
.
ravel_multi_index
((
y
,
x
),
self
.
traversible
.
shape
)
is_traversible
=
self
.
traversible
.
ravel
()[
ind
]
is_valid
=
np
.
all
(
np
.
concatenate
((
is_inside
[:,
np
.
newaxis
],
is_traversible
),
axis
=
1
),
axis
=
1
)
return
is_valid
def
get_feasible_actions
(
self
,
node_ids
):
"""Returns the feasible set of actions from the current node."""
a
=
np
.
zeros
((
len
(
node_ids
),
self
.
task_params
.
num_actions
),
dtype
=
np
.
int32
)
gtG
=
self
.
task
.
gtG
next_node
=
[]
for
i
,
c
in
enumerate
(
node_ids
):
neigh
=
gtG
.
vertex
(
c
).
out_neighbours
()
neigh_edge
=
gtG
.
vertex
(
c
).
out_edges
()
nn
=
{}
for
n
,
e
in
zip
(
neigh
,
neigh_edge
):
_
=
gtG
.
ep
[
'action'
][
e
]
a
[
i
,
_
]
=
1
nn
[
_
]
=
int
(
n
)
next_node
.
append
(
nn
)
return
a
,
next_node
def
take_action
(
self
,
current_node_ids
,
action
):
"""Returns the new node after taking the action action. Stays at the current
node if the action is invalid."""
actions
,
next_node_ids
=
self
.
get_feasible_actions
(
current_node_ids
)
new_node_ids
=
[]
for
i
,
(
c
,
a
)
in
enumerate
(
zip
(
current_node_ids
,
action
)):
if
actions
[
i
,
a
]
==
1
:
new_node_ids
.
append
(
next_node_ids
[
i
][
a
])
else
:
new_node_ids
.
append
(
c
)
return
new_node_ids
def
set_r_obj
(
self
,
r_obj
):
"""Sets the SwiftshaderRenderer object used for rendering."""
self
.
r_obj
=
r_obj
class
Building
(
GridWorld
):
def
__init__
(
self
,
building_name
,
robot
,
env
,
category_list
=
None
,
small
=
False
,
flip
=
False
,
logdir
=
None
,
building_loader
=
None
):
self
.
restrict_to_largest_cc
=
True
self
.
robot
=
robot
self
.
env
=
env
self
.
logdir
=
logdir
# Load the building meta data.
building
=
building_loader
.
load_building
(
building_name
)
if
small
:
building
[
'mesh_names'
]
=
building
[
'mesh_names'
][:
5
]
# New code.
shapess
=
building_loader
.
load_building_meshes
(
building
)
if
flip
:
for
shapes
in
shapess
:
shapes
.
flip_shape
()
vs
=
[]
for
shapes
in
shapess
:
vs
.
append
(
shapes
.
get_vertices
()[
0
])
vs
=
np
.
concatenate
(
vs
,
axis
=
0
)
map
=
make_map
(
env
.
padding
,
env
.
resolution
,
vertex
=
vs
,
sc
=
100.
)
map
=
compute_traversibility
(
map
,
robot
.
base
,
robot
.
height
,
robot
.
radius
,
env
.
valid_min
,
env
.
valid_max
,
env
.
num_point_threshold
,
shapess
=
shapess
,
sc
=
100.
,
n_samples_per_face
=
env
.
n_samples_per_face
)
room_dims
=
_get_room_dimensions
(
building
[
'room_dimension_file'
],
env
.
resolution
,
map
.
origin
,
flip
=
flip
)
class_maps
,
class_map_names
=
_get_semantic_maps
(
building
[
'class_map_folder'
],
building_name
,
map
,
flip
)
self
.
class_maps
=
class_maps
self
.
class_map_names
=
class_map_names
self
.
building
=
building
self
.
shapess
=
shapess
self
.
map
=
map
self
.
traversible
=
map
.
traversible
*
1
self
.
building_name
=
building_name
self
.
room_dims
=
room_dims
self
.
flipped
=
flip
self
.
renderer_entitiy_ids
=
[]
if
self
.
restrict_to_largest_cc
:
self
.
traversible
=
pick_largest_cc
(
self
.
traversible
)
def
load_building_into_scene
(
self
):
# Loads the scene.
self
.
renderer_entitiy_ids
+=
self
.
r_obj
.
load_shapes
(
self
.
shapess
)
# Free up memory, we dont need the mesh or the materials anymore.
self
.
shapess
=
None
def
add_entity_at_nodes
(
self
,
nodes
,
height
,
shape
):
xyt
=
self
.
to_actual_xyt_vec
(
nodes
)
nxy
=
xyt
[:,:
2
]
*
1.
nxy
=
nxy
*
self
.
map
.
resolution
nxy
=
nxy
+
self
.
map
.
origin
Ts
=
np
.
concatenate
((
nxy
,
nxy
[:,:
1
]),
axis
=
1
)
Ts
[:,
2
]
=
height
;
Ts
=
Ts
/
100.
;
# Merge all the shapes into a single shape and add that shape.
shape
.
replicate_shape
(
Ts
)
entity_ids
=
self
.
r_obj
.
load_shapes
([
shape
])
self
.
renderer_entitiy_ids
+=
entity_ids
return
entity_ids
def
add_shapes
(
self
,
shapes
):
scene
=
self
.
r_obj
.
viz
.
scene
()
for
shape
in
shapes
:
scene
.
AddShape
(
shape
)
def
add_materials
(
self
,
materials
):
scene
=
self
.
r_obj
.
viz
.
scene
()
for
material
in
materials
:
scene
.
AddOrUpdateMaterial
(
material
)
def
set_building_visibility
(
self
,
visibility
):
self
.
r_obj
.
set_entity_visible
(
self
.
renderer_entitiy_ids
,
visibility
)
def
render_nodes
(
self
,
nodes
,
perturb
=
None
,
aux_delta_theta
=
0.
):
self
.
set_building_visibility
(
True
)
if
perturb
is
None
:
perturb
=
np
.
zeros
((
len
(
nodes
),
4
))
imgs
=
[]
r
=
2
elevation_z
=
r
*
np
.
tan
(
np
.
deg2rad
(
self
.
robot
.
camera_elevation_degree
))
for
i
in
range
(
len
(
nodes
)):
xyt
=
self
.
to_actual_xyt
(
nodes
[
i
])
lookat_theta
=
3.0
*
np
.
pi
/
2.0
-
(
xyt
[
2
]
+
perturb
[
i
,
2
]
+
aux_delta_theta
)
*
(
self
.
task
.
delta_theta
)
nxy
=
np
.
array
([
xyt
[
0
]
+
perturb
[
i
,
0
],
xyt
[
1
]
+
perturb
[
i
,
1
]]).
reshape
(
1
,
-
1
)
nxy
=
nxy
*
self
.
map
.
resolution
nxy
=
nxy
+
self
.
map
.
origin
camera_xyz
=
np
.
zeros
((
1
,
3
))
camera_xyz
[...]
=
[
nxy
[
0
,
0
],
nxy
[
0
,
1
],
self
.
robot
.
sensor_height
]
camera_xyz
=
camera_xyz
/
100.
lookat_xyz
=
np
.
array
([
-
r
*
np
.
sin
(
lookat_theta
),
-
r
*
np
.
cos
(
lookat_theta
),
elevation_z
])
lookat_xyz
=
lookat_xyz
+
camera_xyz
[
0
,
:]
self
.
r_obj
.
position_camera
(
camera_xyz
[
0
,
:].
tolist
(),
lookat_xyz
.
tolist
(),
[
0.0
,
0.0
,
1.0
])
img
=
self
.
r_obj
.
render
(
take_screenshot
=
True
,
output_type
=
0
)
img
=
[
x
for
x
in
img
if
x
is
not
None
]
img
=
np
.
concatenate
(
img
,
axis
=
2
).
astype
(
np
.
float32
)
if
perturb
[
i
,
3
]
>
0
:
img
=
img
[:,::
-
1
,:]
imgs
.
append
(
img
)
self
.
set_building_visibility
(
False
)
return
imgs
class
MeshMapper
(
Building
):
def
__init__
(
self
,
robot
,
env
,
task_params
,
building_name
,
category_list
,
flip
,
logdir
=
None
,
building_loader
=
None
):
Building
.
__init__
(
self
,
building_name
,
robot
,
env
,
category_list
,
small
=
task_params
.
toy_problem
,
flip
=
flip
,
logdir
=
logdir
,
building_loader
=
building_loader
)
self
.
task_params
=
task_params
self
.
task
=
None
self
.
_preprocess_for_task
(
self
.
task_params
.
building_seed
)
def
_preprocess_for_task
(
self
,
seed
):
if
self
.
task
is
None
or
self
.
task
.
seed
!=
seed
:
rng
=
np
.
random
.
RandomState
(
seed
)
origin_loc
=
get_graph_origin_loc
(
rng
,
self
.
traversible
)
self
.
task
=
utils
.
Foo
(
seed
=
seed
,
origin_loc
=
origin_loc
,
n_ori
=
self
.
task_params
.
n_ori
)
G
=
generate_graph
(
self
.
valid_fn_vec
,
self
.
task_params
.
step_size
,
self
.
task
.
n_ori
,
(
0
,
0
,
0
))
gtG
,
nodes
,
nodes_to_id
=
convert_to_graph_tool
(
G
)
self
.
task
.
gtG
=
gtG
self
.
task
.
nodes
=
nodes
self
.
task
.
delta_theta
=
2.0
*
np
.
pi
/
(
self
.
task
.
n_ori
*
1.
)
self
.
task
.
nodes_to_id
=
nodes_to_id
logging
.
info
(
'Building %s, #V=%d, #E=%d'
,
self
.
building_name
,
self
.
task
.
nodes
.
shape
[
0
],
self
.
task
.
gtG
.
num_edges
())
if
self
.
logdir
is
not
None
:
write_traversible
=
cv2
.
applyColorMap
(
self
.
traversible
.
astype
(
np
.
uint8
)
*
255
,
cv2
.
COLORMAP_JET
)
img_path
=
os
.
path
.
join
(
self
.
logdir
,
'{:s}_{:d}_graph.png'
.
format
(
self
.
building_name
,
seed
))
node_xyt
=
self
.
to_actual_xyt_vec
(
self
.
task
.
nodes
)
plt
.
set_cmap
(
'jet'
);
fig
,
ax
=
utils
.
subplot
(
plt
,
(
1
,
1
),
(
12
,
12
))
ax
.
plot
(
node_xyt
[:,
0
],
node_xyt
[:,
1
],
'm.'
)
ax
.
imshow
(
self
.
traversible
,
origin
=
'lower'
);
ax
.
set_axis_off
();
ax
.
axis
(
'equal'
);
ax
.
set_title
(
'{:s}, {:d}, {:d}'
.
format
(
self
.
building_name
,
self
.
task
.
nodes
.
shape
[
0
],
self
.
task
.
gtG
.
num_edges
()))
if
self
.
room_dims
is
not
None
:
for
i
,
r
in
enumerate
(
self
.
room_dims
[
'dims'
]
*
1
):
min_
=
r
[:
3
]
*
1
max_
=
r
[
3
:]
*
1
xmin
,
ymin
,
zmin
=
min_
xmax
,
ymax
,
zmax
=
max_
ax
.
plot
([
xmin
,
xmax
,
xmax
,
xmin
,
xmin
],
[
ymin
,
ymin
,
ymax
,
ymax
,
ymin
],
'g'
)
with
fu
.
fopen
(
img_path
,
'w'
)
as
f
:
fig
.
savefig
(
f
,
bbox_inches
=
'tight'
,
transparent
=
True
,
pad_inches
=
0
)
plt
.
close
(
fig
)
def
_gen_rng
(
self
,
rng
):
# instances is a list of list of node_ids.
if
self
.
task_params
.
move_type
==
'circle'
:
_
,
_
,
_
,
_
,
paths
=
rng_target_dist_field
(
self
.
task_params
.
batch_size
,
self
.
task
.
gtG
,
rng
,
0
,
1
,
compute_path
=
True
)
instances_
=
paths
instances
=
[]
for
instance_
in
instances_
:
instance
=
instance_
for
i
in
range
(
self
.
task_params
.
num_steps
):
instance
.
append
(
self
.
take_action
([
instance
[
-
1
]],
[
1
])[
0
])
instances
.
append
(
instance
)
elif
self
.
task_params
.
move_type
==
'shortest_path'
:
_
,
_
,
_
,
_
,
paths
=
rng_target_dist_field
(
self
.
task_params
.
batch_size
,
self
.
task
.
gtG
,
rng
,
self
.
task_params
.
num_steps
,
self
.
task_params
.
num_steps
+
1
,
compute_path
=
True
)
instances
=
paths
elif
self
.
task_params
.
move_type
==
'circle+forward'
:
_
,
_
,
_
,
_
,
paths
=
rng_target_dist_field
(
self
.
task_params
.
batch_size
,
self
.
task
.
gtG
,
rng
,
0
,
1
,
compute_path
=
True
)
instances_
=
paths
instances
=
[]
for
instance_
in
instances_
:
instance
=
instance_
for
i
in
range
(
self
.
task_params
.
n_ori
-
1
):
instance
.
append
(
self
.
take_action
([
instance
[
-
1
]],
[
1
])[
0
])
while
len
(
instance
)
<=
self
.
task_params
.
num_steps
:
while
self
.
take_action
([
instance
[
-
1
]],
[
3
])[
0
]
==
instance
[
-
1
]
and
len
(
instance
)
<=
self
.
task_params
.
num_steps
:
instance
.
append
(
self
.
take_action
([
instance
[
-
1
]],
[
2
])[
0
])
if
len
(
instance
)
<=
self
.
task_params
.
num_steps
:
instance
.
append
(
self
.
take_action
([
instance
[
-
1
]],
[
3
])[
0
])
instances
.
append
(
instance
)
# Do random perturbation if needed.
perturbs
=
_gen_perturbs
(
rng
,
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
,
self
.
task_params
.
data_augment
.
lr_flip
,
self
.
task_params
.
data_augment
.
delta_angle
,
self
.
task_params
.
data_augment
.
delta_xy
,
self
.
task_params
.
data_augment
.
structured
)
return
instances
,
perturbs
def
worker
(
self
,
instances
,
perturbs
):
# Output the images and the free space.
# Make the instances be all the same length.
for
i
in
range
(
len
(
instances
)):
for
j
in
range
(
self
.
task_params
.
num_steps
-
len
(
instances
[
i
])
+
1
):
instances
[
i
].
append
(
instances
[
i
][
-
1
])
if
perturbs
[
i
].
shape
[
0
]
<
self
.
task_params
.
num_steps
+
1
:
p
=
np
.
zeros
((
self
.
task_params
.
num_steps
+
1
,
4
))
p
[:
perturbs
[
i
].
shape
[
0
],
:]
=
perturbs
[
i
]
p
[
perturbs
[
i
].
shape
[
0
]:,
:]
=
perturbs
[
i
][
-
1
,:]
perturbs
[
i
]
=
p
instances_
=
[]
for
instance
in
instances
:
instances_
=
instances_
+
instance
perturbs_
=
np
.
concatenate
(
perturbs
,
axis
=
0
)
instances_nodes
=
self
.
task
.
nodes
[
instances_
,:]
instances_nodes
=
[
tuple
(
x
)
for
x
in
instances_nodes
]
imgs_
=
self
.
render_nodes
(
instances_nodes
,
perturbs_
)
imgs
=
[];
next
=
0
;
for
instance
in
instances
:
img_i
=
[]
for
_
in
instance
:
img_i
.
append
(
imgs_
[
next
])
next
=
next
+
1
imgs
.
append
(
img_i
)
imgs
=
np
.
array
(
imgs
)
# Render out the maps in the egocentric view for all nodes and not just the
# last node.
all_nodes
=
[]
for
x
in
instances
:
all_nodes
=
all_nodes
+
x
all_perturbs
=
np
.
concatenate
(
perturbs
,
axis
=
0
)
loc
,
x_axis
,
y_axis
,
theta
=
self
.
get_loc_axis
(
self
.
task
.
nodes
[
all_nodes
,
:]
*
1
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
all_perturbs
)
fss
=
None
valids
=
None
loc_on_map
=
None
theta_on_map
=
None
cum_fs
=
None
cum_valid
=
None
incremental_locs
=
None
incremental_thetas
=
None
if
self
.
task_params
.
output_free_space
:
fss
,
valids
=
get_map_to_predict
(
loc
,
x_axis
,
y_axis
,
map
=
self
.
traversible
*
1.
,
map_size
=
self
.
task_params
.
map_size
)
fss
=
np
.
array
(
fss
)
>
0.5
fss
=
np
.
reshape
(
fss
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
self
.
task_params
.
map_size
,
self
.
task_params
.
map_size
])
valids
=
np
.
reshape
(
np
.
array
(
valids
),
fss
.
shape
)
if
self
.
task_params
.
output_transform_to_global_map
:
# Output the transform to the global map.
loc_on_map
=
np
.
reshape
(
loc
*
1
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
-
1
])
# Converting to location wrt to first location so that warping happens
# properly.
theta_on_map
=
np
.
reshape
(
theta
*
1
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
-
1
])
if
self
.
task_params
.
output_incremental_transform
:
# Output the transform to the global map.
incremental_locs_
=
np
.
reshape
(
loc
*
1
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
-
1
])
incremental_locs_
[:,
1
:,:]
-=
incremental_locs_
[:,:
-
1
,:]
t0
=
-
np
.
pi
/
2
+
np
.
reshape
(
theta
*
1
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
-
1
])
t
=
t0
*
1
incremental_locs
=
incremental_locs_
*
1
incremental_locs
[:,:,
0
]
=
np
.
sum
(
incremental_locs_
*
np
.
concatenate
((
np
.
cos
(
t
),
np
.
sin
(
t
)),
axis
=-
1
),
axis
=-
1
)
incremental_locs
[:,:,
1
]
=
np
.
sum
(
incremental_locs_
*
np
.
concatenate
((
np
.
cos
(
t
+
np
.
pi
/
2
),
np
.
sin
(
t
+
np
.
pi
/
2
)),
axis
=-
1
),
axis
=-
1
)
incremental_locs
[:,
0
,:]
=
incremental_locs_
[:,
0
,:]
# print incremental_locs_[0,:,:], incremental_locs[0,:,:], t0[0,:,:]
incremental_thetas
=
np
.
reshape
(
theta
*
1
,
[
self
.
task_params
.
batch_size
,
self
.
task_params
.
num_steps
+
1
,
-
1
])
incremental_thetas
[:,
1
:,:]
+=
-
incremental_thetas
[:,:
-
1
,:]
if
self
.
task_params
.
output_canonical_map
:
loc_
=
loc
[
0
::(
self
.
task_params
.
num_steps
+
1
),
:]
x_axis
=
np
.
zeros_like
(
loc_
);
x_axis
[:,
1
]
=
1
y_axis
=
np
.
zeros_like
(
loc_
);
y_axis
[:,
0
]
=
-
1
cum_fs
,
cum_valid
=
get_map_to_predict
(
loc_
,
x_axis
,
y_axis
,
map
=
self
.
traversible
*
1.
,
map_size
=
self
.
task_params
.
map_size
)
cum_fs
=
np
.
array
(
cum_fs
)
>
0.5
cum_fs
=
np
.
reshape
(
cum_fs
,
[
self
.
task_params
.
batch_size
,
1
,
self
.
task_params
.
map_size
,
self
.
task_params
.
map_size
])
cum_valid
=
np
.
reshape
(
np
.
array
(
cum_valid
),
cum_fs
.
shape
)
inputs
=
{
'fs_maps'
:
fss
,
'valid_maps'
:
valids
,
'imgs'
:
imgs
,
'loc_on_map'
:
loc_on_map
,
'theta_on_map'
:
theta_on_map
,
'cum_fs_maps'
:
cum_fs
,
'cum_valid_maps'
:
cum_valid
,
'incremental_thetas'
:
incremental_thetas
,
'incremental_locs'
:
incremental_locs
}
return
inputs
def
pre
(
self
,
inputs
):
inputs
[
'imgs'
]
=
image_pre
(
inputs
[
'imgs'
],
self
.
task_params
.
modalities
)
if
inputs
[
'loc_on_map'
]
is
not
None
:
inputs
[
'loc_on_map'
]
=
inputs
[
'loc_on_map'
]
-
inputs
[
'loc_on_map'
][:,[
0
],:]
if
inputs
[
'theta_on_map'
]
is
not
None
:
inputs
[
'theta_on_map'
]
=
np
.
pi
/
2.
-
inputs
[
'theta_on_map'
]
return
inputs
def
_nav_env_reset_helper
(
type
,
rng
,
nodes
,
batch_size
,
gtG
,
max_dist
,
num_steps
,
num_goals
,
data_augment
,
**
kwargs
):
"""Generates and returns a new episode."""
max_compute
=
max_dist
+
4
*
num_steps
if
type
==
'general'
:
start_node_ids
,
end_node_ids
,
dist
,
pred_map
,
paths
=
\
rng_target_dist_field
(
batch_size
,
gtG
,
rng
,
max_dist
,
max_compute
,
nodes
=
nodes
,
compute_path
=
False
)
target_class
=
None
elif
type
==
'room_to_room_many'
:
goal_node_ids
=
[];
dists
=
[];
node_room_ids
=
kwargs
[
'node_room_ids'
]
# Sample the first one
start_node_ids_
,
end_node_ids_
,
dist_
,
_
,
_
=
rng_room_to_room
(
batch_size
,
gtG
,
rng
,
max_dist
,
max_compute
,
node_room_ids
=
node_room_ids
,
nodes
=
nodes
)
start_node_ids
=
start_node_ids_
goal_node_ids
.
append
(
end_node_ids_
)
dists
.
append
(
dist_
)
for
n
in
range
(
num_goals
-
1
):
start_node_ids_
,
end_node_ids_
,
dist_
,
_
,
_
=
rng_next_goal
(
goal_node_ids
[
n
],
batch_size
,
gtG
,
rng
,
max_dist
,
max_compute
,
node_room_ids
=
node_room_ids
,
nodes
=
nodes
,
dists_from_start_node
=
dists
[
n
])
goal_node_ids
.
append
(
end_node_ids_
)
dists
.
append
(
dist_
)
target_class
=
None
elif
type
==
'rng_rejection_sampling_many'
:
num_goals
=
num_goals
goal_node_ids
=
[];
dists
=
[];
n_ori
=
kwargs
[
'n_ori'
]
step_size
=
kwargs
[
'step_size'
]
min_dist
=
kwargs
[
'min_dist'
]
sampling_distribution
=
kwargs
[
'sampling_distribution'
]
target_distribution
=
kwargs
[
'target_distribution'
]
rejection_sampling_M
=
kwargs
[
'rejection_sampling_M'
]
distribution_bins
=
kwargs
[
'distribution_bins'
]
for
n
in
range
(
num_goals
):
if
n
==
0
:
input_nodes
=
None
else
:
input_nodes
=
goal_node_ids
[
n
-
1
]
start_node_ids_
,
end_node_ids_
,
dist_
,
_
,
_
,
_
,
_
=
rng_next_goal_rejection_sampling
(
input_nodes
,
batch_size
,
gtG
,
rng
,
max_dist
,
min_dist
,
max_compute
,
sampling_distribution
,
target_distribution
,
nodes
,
n_ori
,
step_size
,
distribution_bins
,
rejection_sampling_M
)
if
n
==
0
:
start_node_ids
=
start_node_ids_
goal_node_ids
.
append
(
end_node_ids_
)
dists
.
append
(
dist_
)
target_class
=
None
elif
type
==
'room_to_room_back'
:
num_goals
=
num_goals
assert
(
num_goals
==
2
),
'num_goals must be 2.'
goal_node_ids
=
[];
dists
=
[];
node_room_ids
=
kwargs
[
'node_room_ids'
]
# Sample the first one.
start_node_ids_
,
end_node_ids_
,
dist_
,
_
,
_
=
rng_room_to_room
(
batch_size
,
gtG
,
rng
,
max_dist
,
max_compute
,
node_room_ids
=
node_room_ids
,
nodes
=
nodes
)
start_node_ids
=
start_node_ids_
goal_node_ids
.
append
(
end_node_ids_
)
dists
.
append
(
dist_
)
# Set second goal to be starting position, and compute distance to the start node.
goal_node_ids
.
append
(
start_node_ids
)
dist
=
[]
for
i
in
range
(
batch_size
):
dist_
=
gt
.
topology
.
shortest_distance
(
gt
.
GraphView
(
gtG
,
reversed
=
True
),
source
=
gtG
.
vertex
(
start_node_ids
[
i
]),
target
=
None
)
dist_
=
np
.
array
(
dist_
.
get_array
())
dist
.
append
(
dist_
)
dists
.
append
(
dist
)
target_class
=
None
elif
type
[:
14
]
==
'to_nearest_obj'
:
# Generate an episode by sampling one of the target classes (with
# probability proportional to the number of nodes in the world).
# With the sampled class sample a node that is within some distance from
# the sampled class.
class_nodes
=
kwargs
[
'class_nodes'
]
sampling
=
kwargs
[
'sampling'
]
dist_to_class
=
kwargs
[
'dist_to_class'
]
assert
(
num_goals
==
1
),
'Only supports a single goal.'
ind
=
rng
.
choice
(
class_nodes
.
shape
[
0
],
size
=
batch_size
)
target_class
=
class_nodes
[
ind
,
1
]
start_node_ids
=
[];
dists
=
[];
goal_node_ids
=
[];
for
t
in
target_class
:
if
sampling
==
'uniform'
:
max_dist
=
max_dist
cnts
=
np
.
bincount
(
dist_to_class
[
t
],
minlength
=
max_dist
+
1
)
*
1.
cnts
[
max_dist
+
1
:]
=
0
p_each
=
1.
/
cnts
/
(
max_dist
+
1.
)
p_each
[
cnts
==
0
]
=
0
p
=
p_each
[
dist_to_class
[
t
]]
*
1.
;
p
=
p
/
np
.
sum
(
p
)
start_node_id
=
rng
.
choice
(
p
.
shape
[
0
],
size
=
1
,
p
=
p
)[
0
]
else
:
logging
.
fatal
(
'Sampling not one of uniform.'
)
start_node_ids
.
append
(
start_node_id
)
dists
.
append
(
dist_to_class
[
t
])
# Dummy goal node, same as the start node, so that vis is better.
goal_node_ids
.
append
(
start_node_id
)
dists
=
[
dists
]
goal_node_ids
=
[
goal_node_ids
]
return
start_node_ids
,
goal_node_ids
,
dists
,
target_class
class
NavigationEnv
(
GridWorld
,
Building
):
"""Wrapper around GridWorld which sets up navigation tasks.
"""
def
_debug_save_hardness
(
self
,
seed
):
out_path
=
os
.
path
.
join
(
self
.
logdir
,
'{:s}_{:d}_hardness.png'
.
format
(
self
.
building_name
,
seed
))
batch_size
=
4000
rng
=
np
.
random
.
RandomState
(
0
)
start_node_ids
,
end_node_ids
,
dists
,
pred_maps
,
paths
,
hardnesss
,
gt_dists
=
\
rng_next_goal_rejection_sampling
(
None
,
batch_size
,
self
.
task
.
gtG
,
rng
,
self
.
task_params
.
max_dist
,
self
.
task_params
.
min_dist
,
self
.
task_params
.
max_dist
,
self
.
task
.
sampling_distribution
,
self
.
task
.
target_distribution
,
self
.
task
.
nodes
,
self
.
task_params
.
n_ori
,
self
.
task_params
.
step_size
,
self
.
task
.
distribution_bins
,
self
.
task
.
rejection_sampling_M
)
bins
=
self
.
task
.
distribution_bins
n_bins
=
self
.
task
.
n_bins
with
plt
.
style
.
context
(
'ggplot'
):
fig
,
axes
=
utils
.
subplot
(
plt
,
(
1
,
2
),
(
10
,
10
))
ax
=
axes
[
0
]
_
=
ax
.
hist
(
hardnesss
,
bins
=
bins
,
weights
=
np
.
ones_like
(
hardnesss
)
/
len
(
hardnesss
))
ax
.
plot
(
bins
[:
-
1
]
+
0.5
/
n_bins
,
self
.
task
.
target_distribution
,
'g'
)
ax
.
plot
(
bins
[:
-
1
]
+
0.5
/
n_bins
,
self
.
task
.
sampling_distribution
,
'b'
)
ax
.
grid
(
'on'
)
ax
=
axes
[
1
]
_
=
ax
.
hist
(
gt_dists
,
bins
=
np
.
arange
(
self
.
task_params
.
max_dist
+
1
))
ax
.
grid
(
'on'
)
ax
.
set_title
(
'Mean: {:0.2f}, Median: {:0.2f}'
.
format
(
np
.
mean
(
gt_dists
),
np
.
median
(
gt_dists
)))
with
fu
.
fopen
(
out_path
,
'w'
)
as
f
:
fig
.
savefig
(
f
,
bbox_inches
=
'tight'
,
transparent
=
True
,
pad_inches
=
0
)
def
_debug_save_map_nodes
(
self
,
seed
):
"""Saves traversible space along with nodes generated on the graph. Takes
the seed as input."""
img_path
=
os
.
path
.
join
(
self
.
logdir
,
'{:s}_{:d}_graph.png'
.
format
(
self
.
building_name
,
seed
))
node_xyt
=
self
.
to_actual_xyt_vec
(
self
.
task
.
nodes
)
plt
.
set_cmap
(
'jet'
);
fig
,
ax
=
utils
.
subplot
(
plt
,
(
1
,
1
),
(
12
,
12
))
ax
.
plot
(
node_xyt
[:,
0
],
node_xyt
[:,
1
],
'm.'
)
ax
.
set_axis_off
();
ax
.
axis
(
'equal'
);
if
self
.
room_dims
is
not
None
:
for
i
,
r
in
enumerate
(
self
.
room_dims
[
'dims'
]
*
1
):
min_
=
r
[:
3
]
*
1
max_
=
r
[
3
:]
*
1
xmin
,
ymin
,
zmin
=
min_
xmax
,
ymax
,
zmax
=
max_
ax
.
plot
([
xmin
,
xmax
,
xmax
,
xmin
,
xmin
],
[
ymin
,
ymin
,
ymax
,
ymax
,
ymin
],
'g'
)
ax
.
imshow
(
self
.
traversible
,
origin
=
'lower'
);
with
fu
.
fopen
(
img_path
,
'w'
)
as
f
:
fig
.
savefig
(
f
,
bbox_inches
=
'tight'
,
transparent
=
True
,
pad_inches
=
0
)
def
_debug_semantic_maps
(
self
,
seed
):
"""Saves traversible space along with nodes generated on the graph. Takes
the seed as input."""
for
i
,
cls
in
enumerate
(
self
.
task_params
.
semantic_task
.
class_map_names
):
img_path
=
os
.
path
.
join
(
self
.
logdir
,
'{:s}_flip{:d}_{:s}_graph.png'
.
format
(
self
.
building_name
,
seed
,
cls
))
maps
=
self
.
traversible
*
1.
maps
+=
0.5
*
(
self
.
task
.
class_maps_dilated
[:,:,
i
])
write_traversible
=
(
maps
*
1.
+
1.
)
/
3.0
write_traversible
=
(
write_traversible
*
255.
).
astype
(
np
.
uint8
)[:,:,
np
.
newaxis
]
write_traversible
=
write_traversible
+
np
.
zeros
((
1
,
1
,
3
),
dtype
=
np
.
uint8
)
fu
.
write_image
(
img_path
,
write_traversible
[::
-
1
,:,:])
def
_preprocess_for_task
(
self
,
seed
):
"""Sets up the task field for doing navigation on the grid world."""
if
self
.
task
is
None
or
self
.
task
.
seed
!=
seed
:
rng
=
np
.
random
.
RandomState
(
seed
)
origin_loc
=
get_graph_origin_loc
(
rng
,
self
.
traversible
)
self
.
task
=
utils
.
Foo
(
seed
=
seed
,
origin_loc
=
origin_loc
,
n_ori
=
self
.
task_params
.
n_ori
)
G
=
generate_graph
(
self
.
valid_fn_vec
,
self
.
task_params
.
step_size
,
self
.
task
.
n_ori
,
(
0
,
0
,
0
))
gtG
,
nodes
,
nodes_to_id
=
convert_to_graph_tool
(
G
)
self
.
task
.
gtG
=
gtG
self
.
task
.
nodes
=
nodes
self
.
task
.
delta_theta
=
2.0
*
np
.
pi
/
(
self
.
task
.
n_ori
*
1.
)
self
.
task
.
nodes_to_id
=
nodes_to_id
logging
.
info
(
'Building %s, #V=%d, #E=%d'
,
self
.
building_name
,
self
.
task
.
nodes
.
shape
[
0
],
self
.
task
.
gtG
.
num_edges
())
type
=
self
.
task_params
.
type
if
type
==
'general'
:
# Do nothing
_
=
None
elif
type
==
'room_to_room_many'
or
type
==
'room_to_room_back'
:
if
type
==
'room_to_room_back'
:
assert
(
self
.
task_params
.
num_goals
==
2
),
'num_goals must be 2.'
self
.
room_dims
=
_filter_rooms
(
self
.
room_dims
,
self
.
task_params
.
room_regex
)
xyt
=
self
.
to_actual_xyt_vec
(
self
.
task
.
nodes
)
self
.
task
.
node_room_ids
=
_label_nodes_with_room_id
(
xyt
,
self
.
room_dims
)
self
.
task
.
reset_kwargs
=
{
'node_room_ids'
:
self
.
task
.
node_room_ids
}
elif
type
==
'rng_rejection_sampling_many'
:
n_bins
=
20
rejection_sampling_M
=
self
.
task_params
.
rejection_sampling_M
min_dist
=
self
.
task_params
.
min_dist
bins
=
np
.
arange
(
n_bins
+
1
)
/
(
n_bins
*
1.
)
target_d
=
np
.
zeros
(
n_bins
);
target_d
[...]
=
1.
/
n_bins
;
sampling_d
=
get_hardness_distribution
(
self
.
task
.
gtG
,
self
.
task_params
.
max_dist
,
self
.
task_params
.
min_dist
,
np
.
random
.
RandomState
(
0
),
4000
,
bins
,
self
.
task
.
nodes
,
self
.
task_params
.
n_ori
,
self
.
task_params
.
step_size
)
self
.
task
.
reset_kwargs
=
{
'distribution_bins'
:
bins
,
'target_distribution'
:
target_d
,
'sampling_distribution'
:
sampling_d
,
'rejection_sampling_M'
:
rejection_sampling_M
,
'n_bins'
:
n_bins
,
'n_ori'
:
self
.
task_params
.
n_ori
,
'step_size'
:
self
.
task_params
.
step_size
,
'min_dist'
:
self
.
task_params
.
min_dist
}
self
.
task
.
n_bins
=
n_bins
self
.
task
.
distribution_bins
=
bins
self
.
task
.
target_distribution
=
target_d
self
.
task
.
sampling_distribution
=
sampling_d
self
.
task
.
rejection_sampling_M
=
rejection_sampling_M
if
self
.
logdir
is
not
None
:
self
.
_debug_save_hardness
(
seed
)
elif
type
[:
14
]
==
'to_nearest_obj'
:
self
.
room_dims
=
_filter_rooms
(
self
.
room_dims
,
self
.
task_params
.
room_regex
)
xyt
=
self
.
to_actual_xyt_vec
(
self
.
task
.
nodes
)
self
.
class_maps
=
_select_classes
(
self
.
class_maps
,
self
.
class_map_names
,
self
.
task_params
.
semantic_task
.
class_map_names
)
*
1
self
.
class_map_names
=
self
.
task_params
.
semantic_task
.
class_map_names
nodes_xyt
=
self
.
to_actual_xyt_vec
(
np
.
array
(
self
.
task
.
nodes
))
tt
=
utils
.
Timer
();
tt
.
tic
();
if
self
.
task_params
.
type
==
'to_nearest_obj_acc'
:
self
.
task
.
class_maps_dilated
,
self
.
task
.
node_class_label
=
label_nodes_with_class_geodesic
(
nodes_xyt
,
self
.
class_maps
,
self
.
task_params
.
semantic_task
.
pix_distance
+
8
,
self
.
map
.
traversible
,
ff_cost
=
1.
,
fo_cost
=
1.
,
oo_cost
=
4.
,
connectivity
=
8.
)
dists
=
[]
for
i
in
range
(
len
(
self
.
class_map_names
)):
class_nodes_
=
np
.
where
(
self
.
task
.
node_class_label
[:,
i
])[
0
]
dists
.
append
(
get_distance_node_list
(
gtG
,
source_nodes
=
class_nodes_
,
direction
=
'to'
))
self
.
task
.
dist_to_class
=
dists
a_
,
b_
=
np
.
where
(
self
.
task
.
node_class_label
)
self
.
task
.
class_nodes
=
np
.
concatenate
((
a_
[:,
np
.
newaxis
],
b_
[:,
np
.
newaxis
]),
axis
=
1
)
if
self
.
logdir
is
not
None
:
self
.
_debug_semantic_maps
(
seed
)
self
.
task
.
reset_kwargs
=
{
'sampling'
:
self
.
task_params
.
semantic_task
.
sampling
,
'class_nodes'
:
self
.
task
.
class_nodes
,
'dist_to_class'
:
self
.
task
.
dist_to_class
}
if
self
.
logdir
is
not
None
:
self
.
_debug_save_map_nodes
(
seed
)
def
reset
(
self
,
rngs
):
rng
=
rngs
[
0
];
rng_perturb
=
rngs
[
1
];
nodes
=
self
.
task
.
nodes
tp
=
self
.
task_params
start_node_ids
,
goal_node_ids
,
dists
,
target_class
=
\
_nav_env_reset_helper
(
tp
.
type
,
rng
,
self
.
task
.
nodes
,
tp
.
batch_size
,
self
.
task
.
gtG
,
tp
.
max_dist
,
tp
.
num_steps
,
tp
.
num_goals
,
tp
.
data_augment
,
**
(
self
.
task
.
reset_kwargs
))
start_nodes
=
[
tuple
(
nodes
[
_
,:])
for
_
in
start_node_ids
]
goal_nodes
=
[[
tuple
(
nodes
[
_
,:])
for
_
in
__
]
for
__
in
goal_node_ids
]
data_augment
=
tp
.
data_augment
perturbs
=
_gen_perturbs
(
rng_perturb
,
tp
.
batch_size
,
(
tp
.
num_steps
+
1
)
*
tp
.
num_goals
,
data_augment
.
lr_flip
,
data_augment
.
delta_angle
,
data_augment
.
delta_xy
,
data_augment
.
structured
)
perturbs
=
np
.
array
(
perturbs
)
# batch x steps x 4
end_perturbs
=
perturbs
[:,
-
(
tp
.
num_goals
):,:]
*
1
# fixed perturb for the goal.
perturbs
=
perturbs
[:,:
-
(
tp
.
num_goals
),:]
*
1
history
=
-
np
.
ones
((
tp
.
batch_size
,
tp
.
num_steps
*
tp
.
num_goals
),
dtype
=
np
.
int32
)
self
.
episode
=
utils
.
Foo
(
start_nodes
=
start_nodes
,
start_node_ids
=
start_node_ids
,
goal_nodes
=
goal_nodes
,
goal_node_ids
=
goal_node_ids
,
dist_to_goal
=
dists
,
perturbs
=
perturbs
,
goal_perturbs
=
end_perturbs
,
history
=
history
,
target_class
=
target_class
,
history_frames
=
[])
return
start_node_ids
def
take_action
(
self
,
current_node_ids
,
action
,
step_number
):
"""In addition to returning the action, also returns the reward that the
agent receives."""
goal_number
=
step_number
/
self
.
task_params
.
num_steps
new_node_ids
=
GridWorld
.
take_action
(
self
,
current_node_ids
,
action
)
rewards
=
[]
for
i
,
n
in
enumerate
(
new_node_ids
):
reward
=
0
if
n
==
self
.
episode
.
goal_node_ids
[
goal_number
][
i
]:
reward
=
self
.
task_params
.
reward_at_goal
reward
=
reward
-
self
.
task_params
.
reward_time_penalty
rewards
.
append
(
reward
)
return
new_node_ids
,
rewards
def
get_optimal_action
(
self
,
current_node_ids
,
step_number
):
"""Returns the optimal action from the current node."""
goal_number
=
step_number
/
self
.
task_params
.
num_steps
gtG
=
self
.
task
.
gtG
a
=
np
.
zeros
((
len
(
current_node_ids
),
self
.
task_params
.
num_actions
),
dtype
=
np
.
int32
)
d_dict
=
self
.
episode
.
dist_to_goal
[
goal_number
]
for
i
,
c
in
enumerate
(
current_node_ids
):
neigh
=
gtG
.
vertex
(
c
).
out_neighbours
()
neigh_edge
=
gtG
.
vertex
(
c
).
out_edges
()
ds
=
np
.
array
([
d_dict
[
i
][
int
(
x
)]
for
x
in
neigh
])
ds_min
=
np
.
min
(
ds
)
for
i_
,
e
in
enumerate
(
neigh_edge
):
if
ds
[
i_
]
==
ds_min
:
_
=
gtG
.
ep
[
'action'
][
e
]
a
[
i
,
_
]
=
1
return
a
def
get_targets
(
self
,
current_node_ids
,
step_number
):
"""Returns the target actions from the current node."""
action
=
self
.
get_optimal_action
(
current_node_ids
,
step_number
)
action
=
np
.
expand_dims
(
action
,
axis
=
1
)
return
vars
(
utils
.
Foo
(
action
=
action
))
def
get_targets_name
(
self
):
"""Returns the list of names of the targets."""
return
[
'action'
]
def
cleanup
(
self
):
self
.
episode
=
None
class
VisualNavigationEnv
(
NavigationEnv
):
"""Class for doing visual navigation in environments. Functions for computing
features on states, etc.
"""
def
__init__
(
self
,
robot
,
env
,
task_params
,
category_list
=
None
,
building_name
=
None
,
flip
=
False
,
logdir
=
None
,
building_loader
=
None
,
r_obj
=
None
):
tt
=
utils
.
Timer
()
tt
.
tic
()
Building
.
__init__
(
self
,
building_name
,
robot
,
env
,
category_list
,
small
=
task_params
.
toy_problem
,
flip
=
flip
,
logdir
=
logdir
,
building_loader
=
building_loader
)
self
.
set_r_obj
(
r_obj
)
self
.
task_params
=
task_params
self
.
task
=
None
self
.
episode
=
None
self
.
_preprocess_for_task
(
self
.
task_params
.
building_seed
)
if
hasattr
(
self
.
task_params
,
'map_scales'
):
self
.
task
.
scaled_maps
=
resize_maps
(
self
.
traversible
.
astype
(
np
.
float32
)
*
1
,
self
.
task_params
.
map_scales
,
self
.
task_params
.
map_resize_method
)
else
:
logging
.
fatal
(
'VisualNavigationEnv does not support scale_f anymore.'
)
self
.
task
.
readout_maps_scaled
=
resize_maps
(
self
.
traversible
.
astype
(
np
.
float32
)
*
1
,
self
.
task_params
.
readout_maps_scales
,
self
.
task_params
.
map_resize_method
)
tt
.
toc
(
log_at
=
1
,
log_str
=
'VisualNavigationEnv __init__: '
)
def
get_weight
(
self
):
return
self
.
task
.
nodes
.
shape
[
0
]
def
get_common_data
(
self
):
goal_nodes
=
self
.
episode
.
goal_nodes
start_nodes
=
self
.
episode
.
start_nodes
perturbs
=
self
.
episode
.
perturbs
goal_perturbs
=
self
.
episode
.
goal_perturbs
target_class
=
self
.
episode
.
target_class
goal_locs
=
[];
rel_goal_locs
=
[];
for
i
in
range
(
len
(
goal_nodes
)):
end_nodes
=
goal_nodes
[
i
]
goal_loc
,
_
,
_
,
goal_theta
=
self
.
get_loc_axis
(
np
.
array
(
end_nodes
),
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
goal_perturbs
[:,
i
,:])
# Compute the relative location to all goals from the starting location.
loc
,
_
,
_
,
theta
=
self
.
get_loc_axis
(
np
.
array
(
start_nodes
),
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
0
,:])
r_goal
,
t_goal
=
_get_relative_goal_loc
(
goal_loc
*
1.
,
loc
,
theta
)
rel_goal_loc
=
np
.
concatenate
((
r_goal
*
np
.
cos
(
t_goal
),
r_goal
*
np
.
sin
(
t_goal
),
np
.
cos
(
goal_theta
-
theta
),
np
.
sin
(
goal_theta
-
theta
)),
axis
=
1
)
rel_goal_locs
.
append
(
np
.
expand_dims
(
rel_goal_loc
,
axis
=
1
))
goal_locs
.
append
(
np
.
expand_dims
(
goal_loc
,
axis
=
1
))
map
=
self
.
traversible
*
1.
maps
=
np
.
repeat
(
np
.
expand_dims
(
np
.
expand_dims
(
map
,
axis
=
0
),
axis
=
0
),
self
.
task_params
.
batch_size
,
axis
=
0
)
*
1
if
self
.
task_params
.
type
[:
14
]
==
'to_nearest_obj'
:
for
i
in
range
(
self
.
task_params
.
batch_size
):
maps
[
i
,
0
,:,:]
+=
0.5
*
(
self
.
task
.
class_maps_dilated
[:,:,
target_class
[
i
]])
rel_goal_locs
=
np
.
concatenate
(
rel_goal_locs
,
axis
=
1
)
goal_locs
=
np
.
concatenate
(
goal_locs
,
axis
=
1
)
maps
=
np
.
expand_dims
(
maps
,
axis
=-
1
)
if
self
.
task_params
.
type
[:
14
]
==
'to_nearest_obj'
:
rel_goal_locs
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
len
(
self
.
task_params
.
semantic_task
.
class_map_names
)),
dtype
=
np
.
float32
)
goal_locs
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
2
),
dtype
=
np
.
float32
)
for
i
in
range
(
self
.
task_params
.
batch_size
):
t
=
target_class
[
i
]
rel_goal_locs
[
i
,
0
,
t
]
=
1.
goal_locs
[
i
,
0
,
0
]
=
t
goal_locs
[
i
,
0
,
1
]
=
np
.
NaN
return
vars
(
utils
.
Foo
(
orig_maps
=
maps
,
goal_loc
=
goal_locs
,
rel_goal_loc_at_start
=
rel_goal_locs
))
def
pre_common_data
(
self
,
inputs
):
return
inputs
def
get_features
(
self
,
current_node_ids
,
step_number
):
task_params
=
self
.
task_params
goal_number
=
step_number
/
self
.
task_params
.
num_steps
end_nodes
=
self
.
task
.
nodes
[
self
.
episode
.
goal_node_ids
[
goal_number
],:]
*
1
current_nodes
=
self
.
task
.
nodes
[
current_node_ids
,:]
*
1
end_perturbs
=
self
.
episode
.
goal_perturbs
[:,
goal_number
,:][:,
np
.
newaxis
,:]
perturbs
=
self
.
episode
.
perturbs
target_class
=
self
.
episode
.
target_class
# Append to history.
self
.
episode
.
history
[:,
step_number
]
=
np
.
array
(
current_node_ids
)
# Render out the images from current node.
outs
=
{}
if
self
.
task_params
.
outputs
.
images
:
imgs_all
=
[]
imgs
=
self
.
render_nodes
([
tuple
(
x
)
for
x
in
current_nodes
],
perturb
=
perturbs
[:,
step_number
,:])
imgs_all
.
append
(
imgs
)
aux_delta_thetas
=
self
.
task_params
.
aux_delta_thetas
for
i
in
range
(
len
(
aux_delta_thetas
)):
imgs
=
self
.
render_nodes
([
tuple
(
x
)
for
x
in
current_nodes
],
perturb
=
perturbs
[:,
step_number
,:],
aux_delta_theta
=
aux_delta_thetas
[
i
])
imgs_all
.
append
(
imgs
)
imgs_all
=
np
.
array
(
imgs_all
)
# A x B x H x W x C
imgs_all
=
np
.
transpose
(
imgs_all
,
axes
=
[
1
,
0
,
2
,
3
,
4
])
imgs_all
=
np
.
expand_dims
(
imgs_all
,
axis
=
1
)
# B x N x A x H x W x C
if
task_params
.
num_history_frames
>
0
:
if
step_number
==
0
:
# Append the same frame 4 times
for
i
in
range
(
task_params
.
num_history_frames
+
1
):
self
.
episode
.
history_frames
.
insert
(
0
,
imgs_all
*
1.
)
self
.
episode
.
history_frames
.
insert
(
0
,
imgs_all
)
self
.
episode
.
history_frames
.
pop
()
imgs_all_with_history
=
np
.
concatenate
(
self
.
episode
.
history_frames
,
axis
=
2
)
else
:
imgs_all_with_history
=
imgs_all
outs
[
'imgs'
]
=
imgs_all_with_history
# B x N x A x H x W x C
if
self
.
task_params
.
outputs
.
node_ids
:
outs
[
'node_ids'
]
=
np
.
array
(
current_node_ids
).
reshape
((
-
1
,
1
,
1
))
outs
[
'perturbs'
]
=
np
.
expand_dims
(
perturbs
[:,
step_number
,
:]
*
1.
,
axis
=
1
)
if
self
.
task_params
.
outputs
.
analytical_counts
:
assert
(
self
.
task_params
.
modalities
==
[
'depth'
])
d
=
image_pre
(
outs
[
'imgs'
]
*
1.
,
self
.
task_params
.
modalities
)
cm
=
get_camera_matrix
(
self
.
task_params
.
img_width
,
self
.
task_params
.
img_height
,
self
.
task_params
.
img_fov
)
XYZ
=
get_point_cloud_from_z
(
100.
/
d
[...,
0
],
cm
)
XYZ
=
make_geocentric
(
XYZ
*
100.
,
self
.
robot
.
sensor_height
,
self
.
robot
.
camera_elevation_degree
)
for
i
in
range
(
len
(
self
.
task_params
.
analytical_counts
.
map_sizes
)):
non_linearity
=
self
.
task_params
.
analytical_counts
.
non_linearity
[
i
]
count
,
isvalid
=
bin_points
(
XYZ
*
1.
,
map_size
=
self
.
task_params
.
analytical_counts
.
map_sizes
[
i
],
xy_resolution
=
self
.
task_params
.
analytical_counts
.
xy_resolution
[
i
],
z_bins
=
self
.
task_params
.
analytical_counts
.
z_bins
[
i
])
assert
(
count
.
shape
[
2
]
==
1
),
'only works for n_views equal to 1.'
count
=
count
[:,:,
0
,:,:,:]
isvalid
=
isvalid
[:,:,
0
,:,:,:]
if
non_linearity
==
'none'
:
None
elif
non_linearity
==
'min10'
:
count
=
np
.
minimum
(
count
,
10.
)
elif
non_linearity
==
'sqrt'
:
count
=
np
.
sqrt
(
count
)
else
:
logging
.
fatal
(
'Undefined non_linearity.'
)
outs
[
'analytical_counts_{:d}'
.
format
(
i
)]
=
count
# Compute the goal location in the cordinate frame of the robot.
if
self
.
task_params
.
outputs
.
rel_goal_loc
:
if
self
.
task_params
.
type
[:
14
]
!=
'to_nearest_obj'
:
loc
,
_
,
_
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
goal_loc
,
_
,
_
,
goal_theta
=
self
.
get_loc_axis
(
end_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
end_perturbs
[:,
0
,:])
r_goal
,
t_goal
=
_get_relative_goal_loc
(
goal_loc
,
loc
,
theta
)
rel_goal_loc
=
np
.
concatenate
((
r_goal
*
np
.
cos
(
t_goal
),
r_goal
*
np
.
sin
(
t_goal
),
np
.
cos
(
goal_theta
-
theta
),
np
.
sin
(
goal_theta
-
theta
)),
axis
=
1
)
outs
[
'rel_goal_loc'
]
=
np
.
expand_dims
(
rel_goal_loc
,
axis
=
1
)
elif
self
.
task_params
.
type
[:
14
]
==
'to_nearest_obj'
:
rel_goal_loc
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
len
(
self
.
task_params
.
semantic_task
.
class_map_names
)),
dtype
=
np
.
float32
)
for
i
in
range
(
self
.
task_params
.
batch_size
):
t
=
target_class
[
i
]
rel_goal_loc
[
i
,
0
,
t
]
=
1.
outs
[
'rel_goal_loc'
]
=
rel_goal_loc
# Location on map to plot the trajectory during validation.
if
self
.
task_params
.
outputs
.
loc_on_map
:
loc
,
x_axis
,
y_axis
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
outs
[
'loc_on_map'
]
=
np
.
expand_dims
(
loc
,
axis
=
1
)
# Compute gt_dist to goal
if
self
.
task_params
.
outputs
.
gt_dist_to_goal
:
gt_dist_to_goal
=
np
.
zeros
((
len
(
current_node_ids
),
1
),
dtype
=
np
.
float32
)
for
i
,
n
in
enumerate
(
current_node_ids
):
gt_dist_to_goal
[
i
,
0
]
=
self
.
episode
.
dist_to_goal
[
goal_number
][
i
][
n
]
outs
[
'gt_dist_to_goal'
]
=
np
.
expand_dims
(
gt_dist_to_goal
,
axis
=
1
)
# Free space in front of you, map and goal as images.
if
self
.
task_params
.
outputs
.
ego_maps
:
loc
,
x_axis
,
y_axis
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
maps
=
generate_egocentric_maps
(
self
.
task
.
scaled_maps
,
self
.
task_params
.
map_scales
,
self
.
task_params
.
map_crop_sizes
,
loc
,
x_axis
,
y_axis
,
theta
)
for
i
in
range
(
len
(
self
.
task_params
.
map_scales
)):
outs
[
'ego_maps_{:d}'
.
format
(
i
)]
=
\
np
.
expand_dims
(
np
.
expand_dims
(
maps
[
i
],
axis
=
1
),
axis
=-
1
)
if
self
.
task_params
.
outputs
.
readout_maps
:
loc
,
x_axis
,
y_axis
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
maps
=
generate_egocentric_maps
(
self
.
task
.
readout_maps_scaled
,
self
.
task_params
.
readout_maps_scales
,
self
.
task_params
.
readout_maps_crop_sizes
,
loc
,
x_axis
,
y_axis
,
theta
)
for
i
in
range
(
len
(
self
.
task_params
.
readout_maps_scales
)):
outs
[
'readout_maps_{:d}'
.
format
(
i
)]
=
\
np
.
expand_dims
(
np
.
expand_dims
(
maps
[
i
],
axis
=
1
),
axis
=-
1
)
# Images for the goal.
if
self
.
task_params
.
outputs
.
ego_goal_imgs
:
if
self
.
task_params
.
type
[:
14
]
!=
'to_nearest_obj'
:
loc
,
x_axis
,
y_axis
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
goal_loc
,
_
,
_
,
_
=
self
.
get_loc_axis
(
end_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
end_perturbs
[:,
0
,:])
rel_goal_orientation
=
np
.
mod
(
np
.
int32
(
current_nodes
[:,
2
:]
-
end_nodes
[:,
2
:]),
self
.
task_params
.
n_ori
)
goal_dist
,
goal_theta
=
_get_relative_goal_loc
(
goal_loc
,
loc
,
theta
)
goals
=
generate_goal_images
(
self
.
task_params
.
map_scales
,
self
.
task_params
.
map_crop_sizes
,
self
.
task_params
.
n_ori
,
goal_dist
,
goal_theta
,
rel_goal_orientation
)
for
i
in
range
(
len
(
self
.
task_params
.
map_scales
)):
outs
[
'ego_goal_imgs_{:d}'
.
format
(
i
)]
=
np
.
expand_dims
(
goals
[
i
],
axis
=
1
)
elif
self
.
task_params
.
type
[:
14
]
==
'to_nearest_obj'
:
for
i
in
range
(
len
(
self
.
task_params
.
map_scales
)):
num_classes
=
len
(
self
.
task_params
.
semantic_task
.
class_map_names
)
outs
[
'ego_goal_imgs_{:d}'
.
format
(
i
)]
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
self
.
task_params
.
map_crop_sizes
[
i
],
self
.
task_params
.
map_crop_sizes
[
i
],
self
.
task_params
.
goal_channels
))
for
i
in
range
(
self
.
task_params
.
batch_size
):
t
=
target_class
[
i
]
for
j
in
range
(
len
(
self
.
task_params
.
map_scales
)):
outs
[
'ego_goal_imgs_{:d}'
.
format
(
j
)][
i
,:,:,:,
t
]
=
1.
# Incremental locs and theta (for map warping), always in the original scale
# of the map, the subequent steps in the tf code scale appropriately.
# Scaling is done by just multiplying incremental_locs appropriately.
if
self
.
task_params
.
outputs
.
egomotion
:
if
step_number
==
0
:
# Zero Ego Motion
incremental_locs
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
2
),
dtype
=
np
.
float32
)
incremental_thetas
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
,
1
),
dtype
=
np
.
float32
)
else
:
previous_nodes
=
self
.
task
.
nodes
[
self
.
episode
.
history
[:,
step_number
-
1
],
:]
*
1
loc
,
_
,
_
,
theta
=
self
.
get_loc_axis
(
current_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
,:])
previous_loc
,
_
,
_
,
previous_theta
=
self
.
get_loc_axis
(
previous_nodes
,
delta_theta
=
self
.
task
.
delta_theta
,
perturb
=
perturbs
[:,
step_number
-
1
,:])
incremental_locs_
=
np
.
reshape
(
loc
-
previous_loc
,
[
self
.
task_params
.
batch_size
,
1
,
-
1
])
t
=
-
np
.
pi
/
2
+
np
.
reshape
(
theta
*
1
,
[
self
.
task_params
.
batch_size
,
1
,
-
1
])
incremental_locs
=
incremental_locs_
*
1
incremental_locs
[:,:,
0
]
=
np
.
sum
(
incremental_locs_
*
np
.
concatenate
((
np
.
cos
(
t
),
np
.
sin
(
t
)),
axis
=-
1
),
axis
=-
1
)
incremental_locs
[:,:,
1
]
=
np
.
sum
(
incremental_locs_
*
np
.
concatenate
((
np
.
cos
(
t
+
np
.
pi
/
2
),
np
.
sin
(
t
+
np
.
pi
/
2
)),
axis
=-
1
),
axis
=-
1
)
incremental_thetas
=
np
.
reshape
(
theta
-
previous_theta
,
[
self
.
task_params
.
batch_size
,
1
,
-
1
])
outs
[
'incremental_locs'
]
=
incremental_locs
outs
[
'incremental_thetas'
]
=
incremental_thetas
if
self
.
task_params
.
outputs
.
visit_count
:
# Output the visit count for this state, how many times has the current
# state been visited, and how far in the history was the last visit
# (except this one)
visit_count
=
np
.
zeros
((
self
.
task_params
.
batch_size
,
1
),
dtype
=
np
.
int32
)
last_visit
=
-
np
.
ones
((
self
.
task_params
.
batch_size
,
1
),
dtype
=
np
.
int32
)
if
step_number
>=
1
:
h
=
self
.
episode
.
history
[:,:(
step_number
)]
visit_count
[:,
0
]
=
np
.
sum
(
h
==
np
.
array
(
current_node_ids
).
reshape
([
-
1
,
1
]),
axis
=
1
)
last_visit
[:,
0
]
=
np
.
argmax
(
h
[:,::
-
1
]
==
np
.
array
(
current_node_ids
).
reshape
([
-
1
,
1
]),
axis
=
1
)
+
1
last_visit
[
visit_count
==
0
]
=
-
1
# -1 if not visited.
outs
[
'visit_count'
]
=
np
.
expand_dims
(
visit_count
,
axis
=
1
)
outs
[
'last_visit'
]
=
np
.
expand_dims
(
last_visit
,
axis
=
1
)
return
outs
def
get_features_name
(
self
):
f
=
[]
if
self
.
task_params
.
outputs
.
images
:
f
.
append
(
'imgs'
)
if
self
.
task_params
.
outputs
.
rel_goal_loc
:
f
.
append
(
'rel_goal_loc'
)
if
self
.
task_params
.
outputs
.
loc_on_map
:
f
.
append
(
'loc_on_map'
)
if
self
.
task_params
.
outputs
.
gt_dist_to_goal
:
f
.
append
(
'gt_dist_to_goal'
)
if
self
.
task_params
.
outputs
.
ego_maps
:
for
i
in
range
(
len
(
self
.
task_params
.
map_scales
)):
f
.
append
(
'ego_maps_{:d}'
.
format
(
i
))
if
self
.
task_params
.
outputs
.
readout_maps
:
for
i
in
range
(
len
(
self
.
task_params
.
readout_maps_scales
)):
f
.
append
(
'readout_maps_{:d}'
.
format
(
i
))
if
self
.
task_params
.
outputs
.
ego_goal_imgs
:
for
i
in
range
(
len
(
self
.
task_params
.
map_scales
)):
f
.
append
(
'ego_goal_imgs_{:d}'
.
format
(
i
))
if
self
.
task_params
.
outputs
.
egomotion
:
f
.
append
(
'incremental_locs'
)
f
.
append
(
'incremental_thetas'
)
if
self
.
task_params
.
outputs
.
visit_count
:
f
.
append
(
'visit_count'
)
f
.
append
(
'last_visit'
)
if
self
.
task_params
.
outputs
.
analytical_counts
:
for
i
in
range
(
len
(
self
.
task_params
.
analytical_counts
.
map_sizes
)):
f
.
append
(
'analytical_counts_{:d}'
.
format
(
i
))
if
self
.
task_params
.
outputs
.
node_ids
:
f
.
append
(
'node_ids'
)
f
.
append
(
'perturbs'
)
return
f
def
pre_features
(
self
,
inputs
):
if
self
.
task_params
.
outputs
.
images
:
inputs
[
'imgs'
]
=
image_pre
(
inputs
[
'imgs'
],
self
.
task_params
.
modalities
)
return
inputs
class
BuildingMultiplexer
():
def
__init__
(
self
,
args
,
task_number
):
params
=
vars
(
args
)
for
k
in
params
.
keys
():
setattr
(
self
,
k
,
params
[
k
])
self
.
task_number
=
task_number
self
.
_pick_data
(
task_number
)
logging
.
info
(
'Env Class: %s.'
,
self
.
env_class
)
if
self
.
task_params
.
task
==
'planning'
:
self
.
_setup_planner
()
elif
self
.
task_params
.
task
==
'mapping'
:
self
.
_setup_mapper
()
elif
self
.
task_params
.
task
==
'map+plan'
:
self
.
_setup_mapper
()
else
:
logging
.
error
(
'Undefined task: %s'
.
format
(
self
.
task_params
.
task
))
def
_pick_data
(
self
,
task_number
):
logging
.
error
(
'Input Building Names: %s'
,
self
.
building_names
)
self
.
flip
=
[
np
.
mod
(
task_number
/
len
(
self
.
building_names
),
2
)
==
1
]
id
=
np
.
mod
(
task_number
,
len
(
self
.
building_names
))
self
.
building_names
=
[
self
.
building_names
[
id
]]
self
.
task_params
.
building_seed
=
task_number
logging
.
error
(
'BuildingMultiplexer: Picked Building Name: %s'
,
self
.
building_names
)
self
.
building_names
=
self
.
building_names
[
0
].
split
(
'+'
)
self
.
flip
=
[
self
.
flip
[
0
]
for
_
in
self
.
building_names
]
logging
.
error
(
'BuildingMultiplexer: Picked Building Name: %s'
,
self
.
building_names
)
logging
.
error
(
'BuildingMultiplexer: Flipping Buildings: %s'
,
self
.
flip
)
logging
.
error
(
'BuildingMultiplexer: Set building_seed: %d'
,
self
.
task_params
.
building_seed
)
self
.
num_buildings
=
len
(
self
.
building_names
)
logging
.
error
(
'BuildingMultiplexer: Num buildings: %d'
,
self
.
num_buildings
)
def
_setup_planner
(
self
):
# Load building env class.
self
.
buildings
=
[]
for
i
,
building_name
in
enumerate
(
self
.
building_names
):
b
=
self
.
env_class
(
robot
=
self
.
robot
,
env
=
self
.
env
,
task_params
=
self
.
task_params
,
building_name
=
building_name
,
flip
=
self
.
flip
[
i
],
logdir
=
self
.
logdir
,
building_loader
=
self
.
dataset
)
self
.
buildings
.
append
(
b
)
def
_setup_mapper
(
self
):
# Set up the renderer.
cp
=
self
.
camera_param
rgb_shader
,
d_shader
=
sru
.
get_shaders
(
cp
.
modalities
)
r_obj
=
SwiftshaderRenderer
()
r_obj
.
init_display
(
width
=
cp
.
width
,
height
=
cp
.
height
,
fov
=
cp
.
fov
,
z_near
=
cp
.
z_near
,
z_far
=
cp
.
z_far
,
rgb_shader
=
rgb_shader
,
d_shader
=
d_shader
)
self
.
r_obj
=
r_obj
r_obj
.
clear_scene
()
# Load building env class.
self
.
buildings
=
[]
wt
=
[]
for
i
,
building_name
in
enumerate
(
self
.
building_names
):
b
=
self
.
env_class
(
robot
=
self
.
robot
,
env
=
self
.
env
,
task_params
=
self
.
task_params
,
building_name
=
building_name
,
flip
=
self
.
flip
[
i
],
logdir
=
self
.
logdir
,
building_loader
=
self
.
dataset
,
r_obj
=
r_obj
)
wt
.
append
(
b
.
get_weight
())
b
.
load_building_into_scene
()
b
.
set_building_visibility
(
False
)
self
.
buildings
.
append
(
b
)
wt
=
np
.
array
(
wt
).
astype
(
np
.
float32
)
wt
=
wt
/
np
.
sum
(
wt
+
0.0001
)
self
.
building_sampling_weights
=
wt
def
sample_building
(
self
,
rng
):
if
self
.
num_buildings
==
1
:
building_id
=
rng
.
choice
(
range
(
len
(
self
.
building_names
)))
else
:
building_id
=
rng
.
choice
(
self
.
num_buildings
,
p
=
self
.
building_sampling_weights
)
b
=
self
.
buildings
[
building_id
]
instances
=
b
.
_gen_rng
(
rng
)
self
.
_building_id
=
building_id
return
self
.
buildings
[
building_id
],
instances
def
sample_env
(
self
,
rngs
):
rng
=
rngs
[
0
];
if
self
.
num_buildings
==
1
:
building_id
=
rng
.
choice
(
range
(
len
(
self
.
building_names
)))
else
:
building_id
=
rng
.
choice
(
self
.
num_buildings
,
p
=
self
.
building_sampling_weights
)
return
self
.
buildings
[
building_id
]
def
pre
(
self
,
inputs
):
return
self
.
buildings
[
self
.
_building_id
].
pre
(
inputs
)
def
__del__
(
self
):
self
.
r_obj
.
clear_scene
()
logging
.
error
(
'Clearing scene.'
)
cognitive_mapping_and_planning/datasets/nav_env_config.py
0 → 100644
View file @
44fa1d37
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Configs for stanford navigation environment.
Base config for stanford navigation enviornment.
"""
import
numpy
as
np
import
src.utils
as
utils
import
datasets.nav_env
as
nav_env
def
nav_env_base_config
():
"""Returns the base config for stanford navigation environment.
Returns:
Base config for stanford navigation environment.
"""
robot
=
utils
.
Foo
(
radius
=
15
,
base
=
10
,
height
=
140
,
sensor_height
=
120
,
camera_elevation_degree
=-
15
)
env
=
utils
.
Foo
(
padding
=
10
,
resolution
=
5
,
num_point_threshold
=
2
,
valid_min
=-
10
,
valid_max
=
200
,
n_samples_per_face
=
200
)
camera_param
=
utils
.
Foo
(
width
=
225
,
height
=
225
,
z_near
=
0.05
,
z_far
=
20.0
,
fov
=
60.
,
modalities
=
[
'rgb'
],
img_channels
=
3
)
data_augment
=
utils
.
Foo
(
lr_flip
=
0
,
delta_angle
=
0.5
,
delta_xy
=
4
,
relight
=
True
,
relight_fast
=
False
,
structured
=
False
)
# if True, uses the same perturb for the whole episode.
outputs
=
utils
.
Foo
(
images
=
True
,
rel_goal_loc
=
False
,
loc_on_map
=
True
,
gt_dist_to_goal
=
True
,
ego_maps
=
False
,
ego_goal_imgs
=
False
,
egomotion
=
False
,
visit_count
=
False
,
analytical_counts
=
False
,
node_ids
=
True
,
readout_maps
=
False
)
# class_map_names=['board', 'chair', 'door', 'sofa', 'table']
class_map_names
=
[
'chair'
,
'door'
,
'table'
]
semantic_task
=
utils
.
Foo
(
class_map_names
=
class_map_names
,
pix_distance
=
16
,
sampling
=
'uniform'
)
# time per iteration for cmp is 0.82 seconds per episode with 3.4s overhead per batch.
task_params
=
utils
.
Foo
(
max_dist
=
32
,
step_size
=
8
,
num_steps
=
40
,
num_actions
=
4
,
batch_size
=
4
,
building_seed
=
0
,
num_goals
=
1
,
img_height
=
None
,
img_width
=
None
,
img_channels
=
None
,
modalities
=
None
,
outputs
=
outputs
,
map_scales
=
[
1.
],
map_crop_sizes
=
[
64
],
rel_goal_loc_dim
=
4
,
base_class
=
'Building'
,
task
=
'map+plan'
,
n_ori
=
4
,
type
=
'room_to_room_many'
,
data_augment
=
data_augment
,
room_regex
=
'^((?!hallway).)*$'
,
toy_problem
=
False
,
map_channels
=
1
,
gt_coverage
=
False
,
input_type
=
'maps'
,
full_information
=
False
,
aux_delta_thetas
=
[],
semantic_task
=
semantic_task
,
num_history_frames
=
0
,
node_ids_dim
=
1
,
perturbs_dim
=
4
,
map_resize_method
=
'linear_noantialiasing'
,
readout_maps_channels
=
1
,
readout_maps_scales
=
[],
readout_maps_crop_sizes
=
[],
n_views
=
1
,
reward_time_penalty
=
0.1
,
reward_at_goal
=
1.
,
discount_factor
=
0.99
,
rejection_sampling_M
=
100
,
min_dist
=
None
)
navtask_args
=
utils
.
Foo
(
building_names
=
[
'area1_gates_wingA_floor1_westpart'
],
env_class
=
nav_env
.
VisualNavigationEnv
,
robot
=
robot
,
task_params
=
task_params
,
env
=
env
,
camera_param
=
camera_param
,
cache_rooms
=
True
)
return
navtask_args
cognitive_mapping_and_planning/matplotlibrc
0 → 100644
View file @
44fa1d37
backend : agg
Prev
1
2
3
4
5
6
7
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment