Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f5fc733a
Commit
f5fc733a
authored
Feb 03, 2022
by
Byzantine
Browse files
Removing research/community models
parent
09bc9f54
Changes
326
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
4239 deletions
+0
-4239
research/cognitive_planning/preprocessing/lenet_preprocessing.py
...h/cognitive_planning/preprocessing/lenet_preprocessing.py
+0
-44
research/cognitive_planning/preprocessing/preprocessing_factory.py
...cognitive_planning/preprocessing/preprocessing_factory.py
+0
-81
research/cognitive_planning/preprocessing/vgg_preprocessing.py
...rch/cognitive_planning/preprocessing/vgg_preprocessing.py
+0
-365
research/cognitive_planning/standard_fields.py
research/cognitive_planning/standard_fields.py
+0
-224
research/cognitive_planning/string_int_label_map_pb2.py
research/cognitive_planning/string_int_label_map_pb2.py
+0
-138
research/cognitive_planning/tasks.py
research/cognitive_planning/tasks.py
+0
-1507
research/cognitive_planning/train_supervised_active_vision.py
...arch/cognitive_planning/train_supervised_active_vision.py
+0
-503
research/cognitive_planning/train_supervised_active_vision.sh
...arch/cognitive_planning/train_supervised_active_vision.sh
+0
-32
research/cognitive_planning/visualization_utils.py
research/cognitive_planning/visualization_utils.py
+0
-733
research/cognitive_planning/viz_active_vision_dataset_main.py
...arch/cognitive_planning/viz_active_vision_dataset_main.py
+0
-379
research/compression/README.md
research/compression/README.md
+0
-19
research/compression/entropy_coder/README.md
research/compression/entropy_coder/README.md
+0
-109
research/compression/entropy_coder/__init__.py
research/compression/entropy_coder/__init__.py
+0
-0
research/compression/entropy_coder/all_models/__init__.py
research/compression/entropy_coder/all_models/__init__.py
+0
-0
research/compression/entropy_coder/all_models/all_models.py
research/compression/entropy_coder/all_models/all_models.py
+0
-19
research/compression/entropy_coder/all_models/all_models_test.py
...h/compression/entropy_coder/all_models/all_models_test.py
+0
-68
research/compression/entropy_coder/configs/gru_prime3/model_config.json
...ession/entropy_coder/configs/gru_prime3/model_config.json
+0
-4
research/compression/entropy_coder/configs/synthetic/input_config.json
...ression/entropy_coder/configs/synthetic/input_config.json
+0
-4
research/compression/entropy_coder/configs/synthetic/model_config.json
...ression/entropy_coder/configs/synthetic/model_config.json
+0
-4
research/compression/entropy_coder/configs/synthetic/train_config.json
...ression/entropy_coder/configs/synthetic/train_config.json
+0
-6
No files found.
Too many changes to show.
To preserve performance only
326 of 326+
files are displayed.
Plain diff
Email patch
research/cognitive_planning/preprocessing/lenet_preprocessing.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities for preprocessing."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
slim
=
tf
.
contrib
.
slim
def
preprocess_image
(
image
,
output_height
,
output_width
,
is_training
):
"""Preprocesses the given image.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
Returns:
A preprocessed image.
"""
image
=
tf
.
to_float
(
image
)
image
=
tf
.
image
.
resize_image_with_crop_or_pad
(
image
,
output_width
,
output_height
)
image
=
tf
.
subtract
(
image
,
128.0
)
image
=
tf
.
div
(
image
,
128.0
)
return
image
research/cognitive_planning/preprocessing/preprocessing_factory.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a factory for building various models."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
preprocessing
import
cifarnet_preprocessing
from
preprocessing
import
inception_preprocessing
from
preprocessing
import
lenet_preprocessing
from
preprocessing
import
vgg_preprocessing
slim
=
tf
.
contrib
.
slim
def
get_preprocessing
(
name
,
is_training
=
False
):
"""Returns preprocessing_fn(image, height, width, **kwargs).
Args:
name: The name of the preprocessing function.
is_training: `True` if the model is being used for training and `False`
otherwise.
Returns:
preprocessing_fn: A function that preprocessing a single image (pre-batch).
It has the following signature:
image = preprocessing_fn(image, output_height, output_width, ...).
Raises:
ValueError: If Preprocessing `name` is not recognized.
"""
preprocessing_fn_map
=
{
'cifarnet'
:
cifarnet_preprocessing
,
'inception'
:
inception_preprocessing
,
'inception_v1'
:
inception_preprocessing
,
'inception_v2'
:
inception_preprocessing
,
'inception_v3'
:
inception_preprocessing
,
'inception_v4'
:
inception_preprocessing
,
'inception_resnet_v2'
:
inception_preprocessing
,
'lenet'
:
lenet_preprocessing
,
'mobilenet_v1'
:
inception_preprocessing
,
'nasnet_mobile'
:
inception_preprocessing
,
'nasnet_large'
:
inception_preprocessing
,
'pnasnet_large'
:
inception_preprocessing
,
'resnet_v1_50'
:
vgg_preprocessing
,
'resnet_v1_101'
:
vgg_preprocessing
,
'resnet_v1_152'
:
vgg_preprocessing
,
'resnet_v1_200'
:
vgg_preprocessing
,
'resnet_v2_50'
:
vgg_preprocessing
,
'resnet_v2_101'
:
vgg_preprocessing
,
'resnet_v2_152'
:
vgg_preprocessing
,
'resnet_v2_200'
:
vgg_preprocessing
,
'vgg'
:
vgg_preprocessing
,
'vgg_a'
:
vgg_preprocessing
,
'vgg_16'
:
vgg_preprocessing
,
'vgg_19'
:
vgg_preprocessing
,
}
if
name
not
in
preprocessing_fn_map
:
raise
ValueError
(
'Preprocessing name [%s] was not recognized'
%
name
)
def
preprocessing_fn
(
image
,
output_height
,
output_width
,
**
kwargs
):
return
preprocessing_fn_map
[
name
].
preprocess_image
(
image
,
output_height
,
output_width
,
is_training
=
is_training
,
**
kwargs
)
return
preprocessing_fn
research/cognitive_planning/preprocessing/vgg_preprocessing.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images.
The preprocessing steps for VGG were introduced in the following technical
report:
Very Deep Convolutional Networks For Large-Scale Image Recognition
Karen Simonyan and Andrew Zisserman
arXiv technical report, 2015
PDF: http://arxiv.org/pdf/1409.1556.pdf
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
CC-BY-4.0
More information can be obtained from the VGG website:
www.robots.ox.ac.uk/~vgg/research/very_deep/
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
slim
=
tf
.
contrib
.
slim
_R_MEAN
=
123.68
_G_MEAN
=
116.78
_B_MEAN
=
103.94
_RESIZE_SIDE_MIN
=
256
_RESIZE_SIDE_MAX
=
512
def
_crop
(
image
,
offset_height
,
offset_width
,
crop_height
,
crop_width
):
"""Crops the given image using the provided offsets and sizes.
Note that the method doesn't assume we know the input image size but it does
assume we know the input image rank.
Args:
image: an image of shape [height, width, channels].
offset_height: a scalar tensor indicating the height offset.
offset_width: a scalar tensor indicating the width offset.
crop_height: the height of the cropped image.
crop_width: the width of the cropped image.
Returns:
the cropped (and resized) image.
Raises:
InvalidArgumentError: if the rank is not 3 or if the image dimensions are
less than the crop size.
"""
original_shape
=
tf
.
shape
(
image
)
rank_assertion
=
tf
.
Assert
(
tf
.
equal
(
tf
.
rank
(
image
),
3
),
[
'Rank of image must be equal to 3.'
])
with
tf
.
control_dependencies
([
rank_assertion
]):
cropped_shape
=
tf
.
stack
([
crop_height
,
crop_width
,
original_shape
[
2
]])
size_assertion
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
greater_equal
(
original_shape
[
0
],
crop_height
),
tf
.
greater_equal
(
original_shape
[
1
],
crop_width
)),
[
'Crop size greater than the image size.'
])
offsets
=
tf
.
to_int32
(
tf
.
stack
([
offset_height
,
offset_width
,
0
]))
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
with
tf
.
control_dependencies
([
size_assertion
]):
image
=
tf
.
slice
(
image
,
offsets
,
cropped_shape
)
return
tf
.
reshape
(
image
,
cropped_shape
)
def
_random_crop
(
image_list
,
crop_height
,
crop_width
):
"""Crops the given list of images.
The function applies the same crop to each image in the list. This can be
effectively applied when there are multiple image inputs of the same
dimension such as:
image, depths, normals = _random_crop([image, depths, normals], 120, 150)
Args:
image_list: a list of image tensors of the same dimension but possibly
varying channel.
crop_height: the new height.
crop_width: the new width.
Returns:
the image_list with cropped images.
Raises:
ValueError: if there are multiple image inputs provided with different size
or the images are smaller than the crop dimensions.
"""
if
not
image_list
:
raise
ValueError
(
'Empty image_list.'
)
# Compute the rank assertions.
rank_assertions
=
[]
for
i
in
range
(
len
(
image_list
)):
image_rank
=
tf
.
rank
(
image_list
[
i
])
rank_assert
=
tf
.
Assert
(
tf
.
equal
(
image_rank
,
3
),
[
'Wrong rank for tensor %s [expected] [actual]'
,
image_list
[
i
].
name
,
3
,
image_rank
])
rank_assertions
.
append
(
rank_assert
)
with
tf
.
control_dependencies
([
rank_assertions
[
0
]]):
image_shape
=
tf
.
shape
(
image_list
[
0
])
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
crop_size_assert
=
tf
.
Assert
(
tf
.
logical_and
(
tf
.
greater_equal
(
image_height
,
crop_height
),
tf
.
greater_equal
(
image_width
,
crop_width
)),
[
'Crop size greater than the image size.'
])
asserts
=
[
rank_assertions
[
0
],
crop_size_assert
]
for
i
in
range
(
1
,
len
(
image_list
)):
image
=
image_list
[
i
]
asserts
.
append
(
rank_assertions
[
i
])
with
tf
.
control_dependencies
([
rank_assertions
[
i
]]):
shape
=
tf
.
shape
(
image
)
height
=
shape
[
0
]
width
=
shape
[
1
]
height_assert
=
tf
.
Assert
(
tf
.
equal
(
height
,
image_height
),
[
'Wrong height for tensor %s [expected][actual]'
,
image
.
name
,
height
,
image_height
])
width_assert
=
tf
.
Assert
(
tf
.
equal
(
width
,
image_width
),
[
'Wrong width for tensor %s [expected][actual]'
,
image
.
name
,
width
,
image_width
])
asserts
.
extend
([
height_assert
,
width_assert
])
# Create a random bounding box.
#
# Use tf.random_uniform and not numpy.random.rand as doing the former would
# generate random numbers at graph eval time, unlike the latter which
# generates random numbers at graph definition time.
with
tf
.
control_dependencies
(
asserts
):
max_offset_height
=
tf
.
reshape
(
image_height
-
crop_height
+
1
,
[])
with
tf
.
control_dependencies
(
asserts
):
max_offset_width
=
tf
.
reshape
(
image_width
-
crop_width
+
1
,
[])
offset_height
=
tf
.
random_uniform
(
[],
maxval
=
max_offset_height
,
dtype
=
tf
.
int32
)
offset_width
=
tf
.
random_uniform
(
[],
maxval
=
max_offset_width
,
dtype
=
tf
.
int32
)
return
[
_crop
(
image
,
offset_height
,
offset_width
,
crop_height
,
crop_width
)
for
image
in
image_list
]
def
_central_crop
(
image_list
,
crop_height
,
crop_width
):
"""Performs central crops of the given image list.
Args:
image_list: a list of image tensors of the same dimension but possibly
varying channel.
crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop.
Returns:
the list of cropped images.
"""
outputs
=
[]
for
image
in
image_list
:
image_height
=
tf
.
shape
(
image
)[
0
]
image_width
=
tf
.
shape
(
image
)[
1
]
offset_height
=
(
image_height
-
crop_height
)
/
2
offset_width
=
(
image_width
-
crop_width
)
/
2
outputs
.
append
(
_crop
(
image
,
offset_height
,
offset_width
,
crop_height
,
crop_width
))
return
outputs
def
_mean_image_subtraction
(
image
,
means
):
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image = _mean_image_subtraction(image, means)
Note that the rank of `image` must be known.
Args:
image: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if
image
.
get_shape
().
ndims
!=
3
:
raise
ValueError
(
'Input must be of size [height, width, C>0]'
)
num_channels
=
image
.
get_shape
().
as_list
()[
-
1
]
if
len
(
means
)
!=
num_channels
:
raise
ValueError
(
'len(means) must match the number of channels'
)
channels
=
tf
.
split
(
axis
=
2
,
num_or_size_splits
=
num_channels
,
value
=
image
)
for
i
in
range
(
num_channels
):
channels
[
i
]
-=
means
[
i
]
return
tf
.
concat
(
axis
=
2
,
values
=
channels
)
def
_smallest_size_at_least
(
height
,
width
,
smallest_side
):
"""Computes new shape with the smallest side equal to `smallest_side`.
Computes new shape with the smallest side equal to `smallest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
smallest_side: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: and int32 scalar tensor indicating the new width.
"""
smallest_side
=
tf
.
convert_to_tensor
(
smallest_side
,
dtype
=
tf
.
int32
)
height
=
tf
.
to_float
(
height
)
width
=
tf
.
to_float
(
width
)
smallest_side
=
tf
.
to_float
(
smallest_side
)
scale
=
tf
.
cond
(
tf
.
greater
(
height
,
width
),
lambda
:
smallest_side
/
width
,
lambda
:
smallest_side
/
height
)
new_height
=
tf
.
to_int32
(
tf
.
rint
(
height
*
scale
))
new_width
=
tf
.
to_int32
(
tf
.
rint
(
width
*
scale
))
return
new_height
,
new_width
def
_aspect_preserving_resize
(
image
,
smallest_side
):
"""Resize images preserving the original aspect ratio.
Args:
image: A 3-D image `Tensor`.
smallest_side: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
resized_image: A 3-D tensor containing the resized image.
"""
smallest_side
=
tf
.
convert_to_tensor
(
smallest_side
,
dtype
=
tf
.
int32
)
shape
=
tf
.
shape
(
image
)
height
=
shape
[
0
]
width
=
shape
[
1
]
new_height
,
new_width
=
_smallest_size_at_least
(
height
,
width
,
smallest_side
)
image
=
tf
.
expand_dims
(
image
,
0
)
resized_image
=
tf
.
image
.
resize_bilinear
(
image
,
[
new_height
,
new_width
],
align_corners
=
False
)
resized_image
=
tf
.
squeeze
(
resized_image
)
resized_image
.
set_shape
([
None
,
None
,
3
])
return
resized_image
def
preprocess_for_train
(
image
,
output_height
,
output_width
,
resize_side_min
=
_RESIZE_SIDE_MIN
,
resize_side_max
=
_RESIZE_SIDE_MAX
):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
[`resize_size_min`, `resize_size_max`].
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side_min: The lower bound for the smallest side of the image for
aspect-preserving resizing.
resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing.
Returns:
A preprocessed image.
"""
resize_side
=
tf
.
random_uniform
(
[],
minval
=
resize_side_min
,
maxval
=
resize_side_max
+
1
,
dtype
=
tf
.
int32
)
image
=
_aspect_preserving_resize
(
image
,
resize_side
)
image
=
_random_crop
([
image
],
output_height
,
output_width
)[
0
]
image
.
set_shape
([
output_height
,
output_width
,
3
])
image
=
tf
.
to_float
(
image
)
image
=
tf
.
image
.
random_flip_left_right
(
image
)
return
_mean_image_subtraction
(
image
,
[
_R_MEAN
,
_G_MEAN
,
_B_MEAN
])
def
preprocess_for_eval
(
image
,
output_height
,
output_width
,
resize_side
):
"""Preprocesses the given image for evaluation.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side: The smallest side of the image for aspect-preserving resizing.
Returns:
A preprocessed image.
"""
image
=
_aspect_preserving_resize
(
image
,
resize_side
)
image
=
_central_crop
([
image
],
output_height
,
output_width
)[
0
]
image
.
set_shape
([
output_height
,
output_width
,
3
])
image
=
tf
.
to_float
(
image
)
return
_mean_image_subtraction
(
image
,
[
_R_MEAN
,
_G_MEAN
,
_B_MEAN
])
def
preprocess_image
(
image
,
output_height
,
output_width
,
is_training
=
False
,
resize_side_min
=
_RESIZE_SIDE_MIN
,
resize_side_max
=
_RESIZE_SIDE_MAX
):
"""Preprocesses the given image.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
resize_side_min: The lower bound for the smallest side of the image for
aspect-preserving resizing. If `is_training` is `False`, then this value
is used for rescaling.
resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing. If `is_training` is `False`, this value is
ignored. Otherwise, the resize side is sampled from
[resize_size_min, resize_size_max].
Returns:
A preprocessed image.
"""
if
is_training
:
return
preprocess_for_train
(
image
,
output_height
,
output_width
,
resize_side_min
,
resize_side_max
)
else
:
return
preprocess_for_eval
(
image
,
output_height
,
output_width
,
resize_side_min
)
research/cognitive_planning/standard_fields.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains classes specifying naming conventions used for object detection.
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
DetectionResultFields: standard fields returned by object detector.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
class
InputDataFields
(
object
):
"""Names for the input tensors.
Holds the standard data field names to use for identifying input tensors. This
should be used by the decoder to identify keys for the returned tensor_dict
containing input tensors. And it should be used by the model to identify the
tensors it needs.
Attributes:
image: image.
image_additional_channels: additional channels.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
same class, forming a connected group, where instances are heavily
occluding each other.
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_boundaries: ground truth instance boundaries.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
multiclass_scores: the label score per class for each box.
"""
image
=
'image'
image_additional_channels
=
'image_additional_channels'
original_image
=
'original_image'
key
=
'key'
source_id
=
'source_id'
filename
=
'filename'
groundtruth_image_classes
=
'groundtruth_image_classes'
groundtruth_boxes
=
'groundtruth_boxes'
groundtruth_classes
=
'groundtruth_classes'
groundtruth_label_types
=
'groundtruth_label_types'
groundtruth_is_crowd
=
'groundtruth_is_crowd'
groundtruth_area
=
'groundtruth_area'
groundtruth_difficult
=
'groundtruth_difficult'
groundtruth_group_of
=
'groundtruth_group_of'
proposal_boxes
=
'proposal_boxes'
proposal_objectness
=
'proposal_objectness'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_boundaries
=
'groundtruth_instance_boundaries'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
groundtruth_weights
=
'groundtruth_weights'
num_groundtruth_boxes
=
'num_groundtruth_boxes'
true_image_shape
=
'true_image_shape'
multiclass_scores
=
'multiclass_scores'
class
DetectionResultFields
(
object
):
"""Naming conventions for storing the output of the detector.
Attributes:
source_id: source of the original image.
key: unique key corresponding to image.
detection_boxes: coordinates of the detection boxes in the image.
detection_scores: detection scores for the detection boxes in the image.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
num_detections: number of detections in the batch.
"""
source_id
=
'source_id'
key
=
'key'
detection_boxes
=
'detection_boxes'
detection_scores
=
'detection_scores'
detection_classes
=
'detection_classes'
detection_masks
=
'detection_masks'
detection_boundaries
=
'detection_boundaries'
detection_keypoints
=
'detection_keypoints'
num_detections
=
'num_detections'
class
BoxListFields
(
object
):
"""Naming conventions for BoxLists.
Attributes:
boxes: bounding box coordinates.
classes: classes per bounding box.
scores: scores per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
is_crowd: is_crowd annotation per bounding box.
"""
boxes
=
'boxes'
classes
=
'classes'
scores
=
'scores'
weights
=
'weights'
objectness
=
'objectness'
masks
=
'masks'
boundaries
=
'boundaries'
keypoints
=
'keypoints'
keypoint_heatmaps
=
'keypoint_heatmaps'
is_crowd
=
'is_crowd'
class
TfExampleFields
(
object
):
"""TF-example proto feature names for object detection.
Holds the standard feature names to load from an Example proto for object
detection.
Attributes:
image_encoded: JPEG encoded string
image_format: image format, e.g. "JPEG"
filename: filename
channels: number of channels of image
colorspace: colorspace, e.g. "RGB"
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
image_class_text: image-level label in text format
image_class_label: image-level label in numerical format
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_label: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
object_view: viewpoint of object, e.g. ["frontal", "left"]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_group_of: is object a single object or a group of objects
object_depiction: is object a depiction
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
object_segment_area: the area of the segment.
object_weight: a weight factor for the object's bounding box.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
detection_class_label: class label in numbers.
detection_bbox_ymin: ymin coordinates of a detection box.
detection_bbox_xmin: xmin coordinates of a detection box.
detection_bbox_ymax: ymax coordinates of a detection box.
detection_bbox_xmax: xmax coordinates of a detection box.
detection_score: detection score for the class label and box.
"""
image_encoded
=
'image/encoded'
image_format
=
'image/format'
# format is reserved keyword
filename
=
'image/filename'
channels
=
'image/channels'
colorspace
=
'image/colorspace'
height
=
'image/height'
width
=
'image/width'
source_id
=
'image/source_id'
image_class_text
=
'image/class/text'
image_class_label
=
'image/class/label'
object_class_text
=
'image/object/class/text'
object_class_label
=
'image/object/class/label'
object_bbox_ymin
=
'image/object/bbox/ymin'
object_bbox_xmin
=
'image/object/bbox/xmin'
object_bbox_ymax
=
'image/object/bbox/ymax'
object_bbox_xmax
=
'image/object/bbox/xmax'
object_view
=
'image/object/view'
object_truncated
=
'image/object/truncated'
object_occluded
=
'image/object/occluded'
object_difficult
=
'image/object/difficult'
object_group_of
=
'image/object/group_of'
object_depiction
=
'image/object/depiction'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
object_weight
=
'image/object/weight'
instance_masks
=
'image/segmentation/object'
instance_boundaries
=
'image/boundaries/object'
instance_classes
=
'image/segmentation/object/class'
detection_class_label
=
'image/detection/label'
detection_bbox_ymin
=
'image/detection/bbox/ymin'
detection_bbox_xmin
=
'image/detection/bbox/xmin'
detection_bbox_ymax
=
'image/detection/bbox/ymax'
detection_bbox_xmax
=
'image/detection/bbox/xmax'
detection_score
=
'image/detection/score'
research/cognitive_planning/string_int_label_map_pb2.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: object_detection/protos/string_int_label_map.proto
import
sys
_b
=
sys
.
version_info
[
0
]
<
3
and
(
lambda
x
:
x
)
or
(
lambda
x
:
x
.
encode
(
'latin1'
))
from
google.protobuf
import
descriptor
as
_descriptor
from
google.protobuf
import
message
as
_message
from
google.protobuf
import
reflection
as
_reflection
from
google.protobuf
import
symbol_database
as
_symbol_database
from
google.protobuf
import
descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db
=
_symbol_database
.
Default
()
DESCRIPTOR
=
_descriptor
.
FileDescriptor
(
name
=
'object_detection/protos/string_int_label_map.proto'
,
package
=
'object_detection.protos'
,
syntax
=
'proto2'
,
serialized_pb
=
_b
(
'
\n
2object_detection/protos/string_int_label_map.proto
\x12\x17
object_detection.protos
\"
G
\n\x15
StringIntLabelMapItem
\x12\x0c\n\x04
name
\x18\x01
\x01
(
\t\x12\n\n\x02
id
\x18\x02
\x01
(
\x05\x12\x14\n\x0c\x64
isplay_name
\x18\x03
\x01
(
\t\"
Q
\n\x11
StringIntLabelMap
\x12
<
\n\x04
item
\x18\x01
\x03
(
\x0b\x32
..object_detection.protos.StringIntLabelMapItem'
)
)
_STRINGINTLABELMAPITEM
=
_descriptor
.
Descriptor
(
name
=
'StringIntLabelMapItem'
,
full_name
=
'object_detection.protos.StringIntLabelMapItem'
,
filename
=
None
,
file
=
DESCRIPTOR
,
containing_type
=
None
,
fields
=
[
_descriptor
.
FieldDescriptor
(
name
=
'name'
,
full_name
=
'object_detection.protos.StringIntLabelMapItem.name'
,
index
=
0
,
number
=
1
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
has_default_value
=
False
,
default_value
=
_b
(
""
).
decode
(
'utf-8'
),
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
_descriptor
.
FieldDescriptor
(
name
=
'id'
,
full_name
=
'object_detection.protos.StringIntLabelMapItem.id'
,
index
=
1
,
number
=
2
,
type
=
5
,
cpp_type
=
1
,
label
=
1
,
has_default_value
=
False
,
default_value
=
0
,
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
_descriptor
.
FieldDescriptor
(
name
=
'display_name'
,
full_name
=
'object_detection.protos.StringIntLabelMapItem.display_name'
,
index
=
2
,
number
=
3
,
type
=
9
,
cpp_type
=
9
,
label
=
1
,
has_default_value
=
False
,
default_value
=
_b
(
""
).
decode
(
'utf-8'
),
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
],
extensions
=
[
],
nested_types
=
[],
enum_types
=
[
],
options
=
None
,
is_extendable
=
False
,
syntax
=
'proto2'
,
extension_ranges
=
[],
oneofs
=
[
],
serialized_start
=
79
,
serialized_end
=
150
,
)
_STRINGINTLABELMAP
=
_descriptor
.
Descriptor
(
name
=
'StringIntLabelMap'
,
full_name
=
'object_detection.protos.StringIntLabelMap'
,
filename
=
None
,
file
=
DESCRIPTOR
,
containing_type
=
None
,
fields
=
[
_descriptor
.
FieldDescriptor
(
name
=
'item'
,
full_name
=
'object_detection.protos.StringIntLabelMap.item'
,
index
=
0
,
number
=
1
,
type
=
11
,
cpp_type
=
10
,
label
=
3
,
has_default_value
=
False
,
default_value
=
[],
message_type
=
None
,
enum_type
=
None
,
containing_type
=
None
,
is_extension
=
False
,
extension_scope
=
None
,
options
=
None
),
],
extensions
=
[
],
nested_types
=
[],
enum_types
=
[
],
options
=
None
,
is_extendable
=
False
,
syntax
=
'proto2'
,
extension_ranges
=
[],
oneofs
=
[
],
serialized_start
=
152
,
serialized_end
=
233
,
)
_STRINGINTLABELMAP
.
fields_by_name
[
'item'
].
message_type
=
_STRINGINTLABELMAPITEM
DESCRIPTOR
.
message_types_by_name
[
'StringIntLabelMapItem'
]
=
_STRINGINTLABELMAPITEM
DESCRIPTOR
.
message_types_by_name
[
'StringIntLabelMap'
]
=
_STRINGINTLABELMAP
_sym_db
.
RegisterFileDescriptor
(
DESCRIPTOR
)
StringIntLabelMapItem
=
_reflection
.
GeneratedProtocolMessageType
(
'StringIntLabelMapItem'
,
(
_message
.
Message
,),
dict
(
DESCRIPTOR
=
_STRINGINTLABELMAPITEM
,
__module__
=
'object_detection.protos.string_int_label_map_pb2'
# @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
))
_sym_db
.
RegisterMessage
(
StringIntLabelMapItem
)
StringIntLabelMap
=
_reflection
.
GeneratedProtocolMessageType
(
'StringIntLabelMap'
,
(
_message
.
Message
,),
dict
(
DESCRIPTOR
=
_STRINGINTLABELMAP
,
__module__
=
'object_detection.protos.string_int_label_map_pb2'
# @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
))
_sym_db
.
RegisterMessage
(
StringIntLabelMap
)
# @@protoc_insertion_point(module_scope)
research/cognitive_planning/tasks.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A library of tasks.
This interface is intended to implement a wide variety of navigation
tasks. See go/navigation_tasks for a list.
"""
import
abc
import
collections
import
math
import
threading
import
networkx
as
nx
import
numpy
as
np
import
tensorflow
as
tf
#from pyglib import logging
#import gin
from
envs
import
task_env
from
envs
import
util
as
envs_util
# Utility functions.
def
_pad_or_clip_array
(
np_arr
,
arr_len
,
is_front_clip
=
True
,
output_mask
=
False
):
"""Make np_arr array to have length arr_len.
If the array is shorter than arr_len, then it is padded from the front with
zeros. If it is longer, then it is clipped either from the back or from the
front. Only the first dimension is modified.
Args:
np_arr: numpy array.
arr_len: integer scalar.
is_front_clip: a boolean. If true then clipping is done in the front,
otherwise in the back.
output_mask: If True, outputs a numpy array of rank 1 which represents
a mask of which values have been added (0 - added, 1 - actual output).
Returns:
A numpy array and the size of padding (as a python int32). This size is
negative is the array is clipped.
"""
shape
=
list
(
np_arr
.
shape
)
pad_size
=
arr_len
-
shape
[
0
]
padded_or_clipped
=
None
if
pad_size
<
0
:
if
is_front_clip
:
padded_or_clipped
=
np_arr
[
-
pad_size
:,
:]
else
:
padded_or_clipped
=
np_arr
[:
arr_len
,
:]
elif
pad_size
>
0
:
padding
=
np
.
zeros
([
pad_size
]
+
shape
[
1
:],
dtype
=
np_arr
.
dtype
)
padded_or_clipped
=
np
.
concatenate
([
np_arr
,
padding
],
axis
=
0
)
else
:
padded_or_clipped
=
np_arr
if
output_mask
:
mask
=
np
.
ones
((
arr_len
,),
dtype
=
np
.
int
)
if
pad_size
>
0
:
mask
[
-
pad_size
:]
=
0
return
padded_or_clipped
,
pad_size
,
mask
else
:
return
padded_or_clipped
,
pad_size
def
classification_loss
(
truth
,
predicted
,
weights
=
None
,
is_one_hot
=
True
):
"""A cross entropy loss.
Computes the mean of cross entropy losses for all pairs of true labels and
predictions. It wraps around a tf implementation of the cross entropy loss
with additional reformating of the inputs. If the truth and predicted are
n-rank Tensors with n > 2, then these are reshaped to 2-rank Tensors. It
allows for truth to be specified as one hot vector or class indices. Finally,
a weight can be specified for each element in truth and predicted.
Args:
truth: an n-rank or (n-1)-rank Tensor containing labels. If is_one_hot is
True, then n-rank Tensor is expected, otherwise (n-1) rank one.
predicted: an n-rank float Tensor containing prediction probabilities.
weights: an (n-1)-rank float Tensor of weights
is_one_hot: a boolean.
Returns:
A TF float scalar.
"""
num_labels
=
predicted
.
get_shape
().
as_list
()[
-
1
]
if
not
is_one_hot
:
truth
=
tf
.
reshape
(
truth
,
[
-
1
])
truth
=
tf
.
one_hot
(
truth
,
depth
=
num_labels
,
on_value
=
1.0
,
off_value
=
0.0
,
axis
=-
1
)
else
:
truth
=
tf
.
reshape
(
truth
,
[
-
1
,
num_labels
])
predicted
=
tf
.
reshape
(
predicted
,
[
-
1
,
num_labels
])
losses
=
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
truth
,
logits
=
predicted
)
if
weights
is
not
None
:
losses
=
tf
.
boolean_mask
(
losses
,
tf
.
cast
(
tf
.
reshape
(
weights
,
[
-
1
]),
dtype
=
tf
.
bool
))
return
tf
.
reduce_mean
(
losses
)
class
UnrolledTaskIOConfig
(
object
):
"""Configuration of task inputs and outputs.
A task can have multiple inputs, which define the context, and a task query
which defines what is to be executed in this context. The desired execution
is encoded in an output. The config defines the shapes of the inputs, the
query and the outputs.
"""
def
__init__
(
self
,
inputs
,
output
,
query
=
None
):
"""Constructs a Task input/output config.
Args:
inputs: a list of tuples. Each tuple represents the configuration of an
input, with first element being the type (a string value) and the second
element the shape.
output: a tuple representing the configuration of the output.
query: a tuple representing the configuration of the query. If no query,
then None.
"""
# A configuration of a single input, output or query. Consists of the type,
# which can be one of the three specified above, and a shape. The shape must
# be consistent with the type, e.g. if type == 'image', then shape is a 3
# valued list.
io_config
=
collections
.
namedtuple
(
'IOConfig'
,
[
'type'
,
'shape'
])
def
assert_config
(
config
):
if
not
isinstance
(
config
,
tuple
):
raise
ValueError
(
'config must be a tuple. Received {}'
.
format
(
type
(
config
)))
if
len
(
config
)
!=
2
:
raise
ValueError
(
'config must have 2 elements, has %d'
%
len
(
config
))
if
not
isinstance
(
config
[
0
],
tf
.
DType
):
raise
ValueError
(
'First element of config must be a tf.DType.'
)
if
not
isinstance
(
config
[
1
],
list
):
raise
ValueError
(
'Second element of config must be a list.'
)
assert
isinstance
(
inputs
,
collections
.
OrderedDict
)
for
modality_type
in
inputs
:
assert_config
(
inputs
[
modality_type
])
self
.
_inputs
=
collections
.
OrderedDict
(
[(
k
,
io_config
(
*
value
))
for
k
,
value
in
inputs
.
iteritems
()])
if
query
is
not
None
:
assert_config
(
query
)
self
.
_query
=
io_config
(
*
query
)
else
:
self
.
_query
=
None
assert_config
(
output
)
self
.
_output
=
io_config
(
*
output
)
@
property
def
inputs
(
self
):
return
self
.
_inputs
@
property
def
output
(
self
):
return
self
.
_output
@
property
def
query
(
self
):
return
self
.
_query
class
UnrolledTask
(
object
):
"""An interface for a Task which can be unrolled during training.
Each example is called episode and consists of inputs and target output, where
the output can be considered as desired unrolled sequence of actions for the
inputs. For the specified tasks, these action sequences are to be
unambiguously definable.
"""
__metaclass__
=
abc
.
ABCMeta
def
__init__
(
self
,
config
):
assert
isinstance
(
config
,
UnrolledTaskIOConfig
)
self
.
_config
=
config
# A dict of bookkeeping variables.
self
.
info
=
{}
# Tensorflow input is multithreaded and this lock is needed to prevent
# race condition in the environment. Without the lock, non-thread safe
# environments crash.
self
.
_lock
=
threading
.
Lock
()
@
property
def
config
(
self
):
return
self
.
_config
@
abc
.
abstractmethod
def
episode
(
self
):
"""Returns data needed to train and test a single episode.
Each episode consists of inputs, which define the context of the task, a
query which defines the task, and a target output, which defines a
sequence of actions to be executed for this query. This sequence should not
require feedback, i.e. can be predicted purely from input and query.]
Returns:
inputs, query, output, where inputs is a list of numpy arrays and query
and output are numpy arrays. These arrays must be of shape and type as
specified in the task configuration.
"""
pass
def
reset
(
self
,
observation
):
"""Called after the environment is reset."""
pass
def
episode_batch
(
self
,
batch_size
):
"""Returns a batch of episodes.
Args:
batch_size: size of batch.
Returns:
(inputs, query, output, masks) where inputs is list of numpy arrays and
query, output, and mask are numpy arrays. These arrays must be of shape
and type as specified in the task configuration with one additional
preceding dimension corresponding to the batch.
Raises:
ValueError: if self.episode() returns illegal values.
"""
batched_inputs
=
collections
.
OrderedDict
(
[[
mtype
,
[]]
for
mtype
in
self
.
config
.
inputs
])
batched_queries
=
[]
batched_outputs
=
[]
batched_masks
=
[]
for
_
in
range
(
int
(
batch_size
)):
with
self
.
_lock
:
# The episode function needs to be thread-safe. Since the current
# implementation for the envs are not thread safe we need to have lock
# the operations here.
inputs
,
query
,
outputs
=
self
.
episode
()
if
not
isinstance
(
outputs
,
tuple
):
raise
ValueError
(
'Outputs return value must be tuple.'
)
if
len
(
outputs
)
!=
2
:
raise
ValueError
(
'Output tuple must be of size 2.'
)
if
inputs
is
not
None
:
for
modality_type
in
batched_inputs
:
batched_inputs
[
modality_type
].
append
(
np
.
expand_dims
(
inputs
[
modality_type
],
axis
=
0
))
if
query
is
not
None
:
batched_queries
.
append
(
np
.
expand_dims
(
query
,
axis
=
0
))
batched_outputs
.
append
(
np
.
expand_dims
(
outputs
[
0
],
axis
=
0
))
if
outputs
[
1
]
is
not
None
:
batched_masks
.
append
(
np
.
expand_dims
(
outputs
[
1
],
axis
=
0
))
batched_inputs
=
{
k
:
np
.
concatenate
(
i
,
axis
=
0
)
for
k
,
i
in
batched_inputs
.
iteritems
()
}
if
batched_queries
:
batched_queries
=
np
.
concatenate
(
batched_queries
,
axis
=
0
)
batched_outputs
=
np
.
concatenate
(
batched_outputs
,
axis
=
0
)
if
batched_masks
:
batched_masks
=
np
.
concatenate
(
batched_masks
,
axis
=
0
).
astype
(
np
.
float32
)
else
:
# When the array is empty, the default np.dtype is float64 which causes
# py_func to crash in the tests.
batched_masks
=
np
.
array
([],
dtype
=
np
.
float32
)
batched_inputs
=
[
batched_inputs
[
k
]
for
k
in
self
.
_config
.
inputs
]
return
batched_inputs
,
batched_queries
,
batched_outputs
,
batched_masks
def
tf_episode_batch
(
self
,
batch_size
):
"""A batch of episodes as TF Tensors.
Same as episode_batch with the difference that the return values are TF
Tensors.
Args:
batch_size: a python float for the batch size.
Returns:
inputs, query, output, mask where inputs is a dictionary of tf.Tensor
where the keys are the modality types specified in the config.inputs.
query, output, and mask are TF Tensors. These tensors must
be of shape and type as specified in the task configuration with one
additional preceding dimension corresponding to the batch. Both mask and
output have the same shape as output.
"""
# Define TF outputs.
touts
=
[]
shapes
=
[]
for
_
,
i
in
self
.
_config
.
inputs
.
iteritems
():
touts
.
append
(
i
.
type
)
shapes
.
append
(
i
.
shape
)
if
self
.
_config
.
query
is
not
None
:
touts
.
append
(
self
.
_config
.
query
.
type
)
shapes
.
append
(
self
.
_config
.
query
.
shape
)
# Shapes and types for batched_outputs.
touts
.
append
(
self
.
_config
.
output
.
type
)
shapes
.
append
(
self
.
_config
.
output
.
shape
)
# Shapes and types for batched_masks.
touts
.
append
(
self
.
_config
.
output
.
type
)
shapes
.
append
(
self
.
_config
.
output
.
shape
[
0
:
1
])
def
episode_batch_func
():
if
self
.
config
.
query
is
None
:
inp
,
_
,
output
,
masks
=
self
.
episode_batch
(
int
(
batch_size
))
return
tuple
(
inp
)
+
(
output
,
masks
)
else
:
inp
,
query
,
output
,
masks
=
self
.
episode_batch
(
int
(
batch_size
))
return
tuple
(
inp
)
+
(
query
,
output
,
masks
)
tf_episode_batch
=
tf
.
py_func
(
episode_batch_func
,
[],
touts
,
stateful
=
True
,
name
=
'taskdata'
)
for
episode
,
shape
in
zip
(
tf_episode_batch
,
shapes
):
episode
.
set_shape
([
batch_size
]
+
shape
)
tf_episode_batch_dict
=
collections
.
OrderedDict
([
(
mtype
,
episode
)
for
mtype
,
episode
in
zip
(
self
.
config
.
inputs
.
keys
(),
tf_episode_batch
)
])
cur_index
=
len
(
self
.
config
.
inputs
.
keys
())
tf_query
=
None
if
self
.
config
.
query
is
not
None
:
tf_query
=
tf_episode_batch
[
cur_index
]
cur_index
+=
1
tf_outputs
=
tf_episode_batch
[
cur_index
]
tf_masks
=
tf_episode_batch
[
cur_index
+
1
]
return
tf_episode_batch_dict
,
tf_query
,
tf_outputs
,
tf_masks
@
abc
.
abstractmethod
def
target_loss
(
self
,
true_targets
,
targets
,
weights
=
None
):
"""A loss for training a task model.
This loss measures the discrepancy between the task outputs, the true and
predicted ones.
Args:
true_targets: tf.Tensor of shape and type as defined in the task config
containing the true outputs.
targets: tf.Tensor of shape and type as defined in the task config
containing the predicted outputs.
weights: a bool tf.Tensor of shape as targets. Only true values are
considered when formulating the loss.
"""
pass
def
reward
(
self
,
obs
,
done
,
info
):
"""Returns a reward.
The tasks has to compute a reward based on the state of the environment. The
reward computation, though, is task specific. The task is to use the
environment interface, as defined in task_env.py, to compute the reward. If
this interface does not expose enough information, it is to be updated.
Args:
obs: Observation from environment's step function.
done: Done flag from environment's step function.
info: Info dict from environment's step function.
Returns:
obs: Observation.
reward: Floating point value.
done: Done flag.
info: Info dict.
"""
# Default implementation does not do anything.
return
obs
,
0.0
,
done
,
info
class
RandomExplorationBasedTask
(
UnrolledTask
):
"""A Task which starts with a random exploration of the environment."""
def
__init__
(
self
,
env
,
seed
,
add_query_noise
=
False
,
query_noise_var
=
0.0
,
*
args
,
**
kwargs
):
# pylint: disable=keyword-arg-before-vararg
"""Initializes a Task using a random exploration runs.
Args:
env: an instance of type TaskEnv and gym.Env.
seed: a random seed.
add_query_noise: boolean, if True then whatever queries are generated,
they are randomly perturbed. The semantics of the queries depends on the
concrete task implementation.
query_noise_var: float, the variance of Gaussian noise used for query
perturbation. Used iff add_query_noise==True.
*args: see super class.
**kwargs: see super class.
"""
super
(
RandomExplorationBasedTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
assert
isinstance
(
env
,
task_env
.
TaskEnv
)
self
.
_env
=
env
self
.
_env
.
set_task
(
self
)
self
.
_rng
=
np
.
random
.
RandomState
(
seed
)
self
.
_add_query_noise
=
add_query_noise
self
.
_query_noise_var
=
query_noise_var
# GoToStaticXTask can also take empty config but for the rest of the classes
# the number of modality types is 1.
if
len
(
self
.
config
.
inputs
.
keys
())
>
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type or less.'
)
def
_exploration
(
self
):
"""Generates a random exploration run.
The function uses the environment to generate a run.
Returns:
A tuple of numpy arrays. The i-th array contains observation of type and
shape as specified in config.inputs[i].
A list of states along the exploration path.
A list of vertex indices corresponding to the path of the exploration.
"""
in_seq_len
=
self
.
_config
.
inputs
.
values
()[
0
].
shape
[
0
]
path
,
_
,
states
,
step_outputs
=
self
.
_env
.
random_step_sequence
(
min_len
=
in_seq_len
)
obs
=
{
modality_type
:
[]
for
modality_type
in
self
.
_config
.
inputs
}
for
o
in
step_outputs
:
step_obs
,
_
,
done
,
_
=
o
# It is expected that each value of step_obs is a dict of observations,
# whose dimensions are consistent with the config.inputs sizes.
for
modality_type
in
self
.
_config
.
inputs
:
assert
modality_type
in
step_obs
,
'{}'
.
format
(
type
(
step_obs
))
o
=
step_obs
[
modality_type
]
i
=
self
.
_config
.
inputs
[
modality_type
]
assert
len
(
o
.
shape
)
==
len
(
i
.
shape
)
-
1
for
dim_o
,
dim_i
in
zip
(
o
.
shape
,
i
.
shape
[
1
:]):
assert
dim_o
==
dim_i
,
'{} != {}'
.
format
(
dim_o
,
dim_i
)
obs
[
modality_type
].
append
(
o
)
if
done
:
break
if
not
obs
:
return
obs
,
states
,
path
max_path_len
=
int
(
round
(
in_seq_len
*
float
(
len
(
path
))
/
float
(
len
(
obs
.
values
()[
0
]))))
path
=
path
[
-
max_path_len
:]
states
=
states
[
-
in_seq_len
:]
# The above obs is a list of tuples of np,array. Re-format them as tuple of
# np.array, each array containing all observations from all steps.
def
regroup
(
obs
,
i
):
"""Regroups observations.
Args:
obs: a list of tuples of same size. The k-th tuple contains all the
observations from k-th step. Each observation is a numpy array.
i: the index of the observation in each tuple to be grouped.
Returns:
A numpy array of shape config.inputs[i] which contains all i-th
observations from all steps. These are concatenated along the first
dimension. In addition, if the number of observations is different from
the one specified in config.inputs[i].shape[0], then the array is either
padded from front or clipped.
"""
grouped_obs
=
np
.
concatenate
(
[
np
.
expand_dims
(
o
,
axis
=
0
)
for
o
in
obs
[
i
]],
axis
=
0
)
in_seq_len
=
self
.
_config
.
inputs
[
i
].
shape
[
0
]
# pylint: disable=unbalanced-tuple-unpacking
grouped_obs
,
_
=
_pad_or_clip_array
(
grouped_obs
,
in_seq_len
,
is_front_clip
=
True
)
return
grouped_obs
all_obs
=
{
i
:
regroup
(
obs
,
i
)
for
i
in
self
.
_config
.
inputs
}
return
all_obs
,
states
,
path
def
_obs_to_state
(
self
,
path
,
states
):
"""Computes mapping between path nodes and states."""
# Generate a numpy array of locations corresponding to the path vertices.
path_coordinates
=
map
(
self
.
_env
.
vertex_to_pose
,
path
)
path_coordinates
=
np
.
concatenate
(
[
np
.
reshape
(
p
,
[
1
,
2
])
for
p
in
path_coordinates
])
# The observations are taken along a smoothed trajectory following the path.
# We compute a mapping between the obeservations and the map vertices.
path_to_obs
=
collections
.
defaultdict
(
list
)
obs_to_state
=
[]
for
i
,
s
in
enumerate
(
states
):
location
=
np
.
reshape
(
s
[
0
:
2
],
[
1
,
2
])
index
=
np
.
argmin
(
np
.
reshape
(
np
.
sum
(
np
.
power
(
path_coordinates
-
location
,
2
),
axis
=
1
),
[
-
1
]))
index
=
path
[
index
]
path_to_obs
[
index
].
append
(
i
)
obs_to_state
.
append
(
index
)
return
path_to_obs
,
obs_to_state
def
_perturb_state
(
self
,
state
,
noise_var
):
"""Perturbes the state.
The location are purturbed using a Gaussian noise with variance
noise_var. The orientation is uniformly sampled.
Args:
state: a numpy array containing an env state (x, y locations).
noise_var: float
Returns:
The perturbed state.
"""
def
normal
(
v
,
std
):
if
std
>
0
:
n
=
self
.
_rng
.
normal
(
0.0
,
std
)
n
=
min
(
n
,
2.0
*
std
)
n
=
max
(
n
,
-
2.0
*
std
)
return
v
+
n
else
:
return
v
state
=
state
.
copy
()
state
[
0
]
=
normal
(
state
[
0
],
noise_var
)
state
[
1
]
=
normal
(
state
[
1
],
noise_var
)
if
state
.
size
>
2
:
state
[
2
]
=
self
.
_rng
.
uniform
(
-
math
.
pi
,
math
.
pi
)
return
state
def
_sample_obs
(
self
,
indices
,
observations
,
observation_states
,
path_to_obs
,
max_obs_index
=
None
,
use_exploration_obs
=
True
):
"""Samples one observation which corresponds to vertex_index in path.
In addition, the sampled observation must have index in observations less
than max_obs_index. If these two conditions cannot be satisfied the
function returns None.
Args:
indices: a list of integers.
observations: a list of numpy arrays containing all the observations.
observation_states: a list of numpy arrays, each array representing the
state of the observation.
path_to_obs: a dict of path indices to lists of observation indices.
max_obs_index: an integer.
use_exploration_obs: if True, then the observation is sampled among the
specified observations, otherwise it is obtained from the environment.
Returns:
A tuple of:
-- A numpy array of size width x height x 3 representing the sampled
observation.
-- The index of the sampld observation among the input observations.
-- The state at which the observation is captured.
Raises:
ValueError: if the observation and observation_states lists are of
different lengths.
"""
if
len
(
observations
)
!=
len
(
observation_states
):
raise
ValueError
(
'observation and observation_states lists must have '
'equal lengths'
)
if
not
indices
:
return
None
,
None
,
None
vertex_index
=
self
.
_rng
.
choice
(
indices
)
if
use_exploration_obs
:
obs_indices
=
path_to_obs
[
vertex_index
]
if
max_obs_index
is
not
None
:
obs_indices
=
[
i
for
i
in
obs_indices
if
i
<
max_obs_index
]
if
obs_indices
:
index
=
self
.
_rng
.
choice
(
obs_indices
)
if
self
.
_add_query_noise
:
xytheta
=
self
.
_perturb_state
(
observation_states
[
index
],
self
.
_query_noise_var
)
return
self
.
_env
.
observation
(
xytheta
),
index
,
xytheta
else
:
return
observations
[
index
],
index
,
observation_states
[
index
]
else
:
return
None
,
None
,
None
else
:
xy
=
self
.
_env
.
vertex_to_pose
(
vertex_index
)
xytheta
=
np
.
array
([
xy
[
0
],
xy
[
1
],
0.0
])
xytheta
=
self
.
_perturb_state
(
xytheta
,
self
.
_query_noise_var
)
return
self
.
_env
.
observation
(
xytheta
),
None
,
xytheta
class
AreNearbyTask
(
RandomExplorationBasedTask
):
"""A task of identifying whether a query is nearby current location or not.
The query is guaranteed to be in proximity of an already visited location,
i.e. close to one of the observations. For each observation we have one
query, which is either close or not to this observation.
"""
def
__init__
(
self
,
max_distance
=
0
,
*
args
,
**
kwargs
):
# pylint: disable=keyword-arg-before-vararg
super
(
AreNearbyTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_max_distance
=
max_distance
if
len
(
self
.
config
.
inputs
.
keys
())
!=
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type'
)
def
episode
(
self
):
"""Episode data.
Returns:
observations: a tuple with one element. This element is a numpy array of
size in_seq_len x observation_size x observation_size x 3 containing
in_seq_len images.
query: a numpy array of size
in_seq_len x observation_size X observation_size x 3 containing a query
image.
A tuple of size two. First element is a in_seq_len x 2 numpy array of
either 1.0 or 0.0. The i-th element denotes whether the i-th query
image is neraby (value 1.0) or not (value 0.0) to the i-th observation.
The second element in the tuple is a mask, a numpy array of size
in_seq_len x 1 and values 1.0 or 0.0 denoting whether the query is
valid or not (it can happen that the query is not valid, e.g. there are
not enough observations to have a meaningful queries).
"""
observations
,
states
,
path
=
self
.
_exploration
()
assert
len
(
observations
.
values
()[
0
])
==
len
(
states
)
# The observations are taken along a smoothed trajectory following the path.
# We compute a mapping between the obeservations and the map vertices.
path_to_obs
,
obs_to_path
=
self
.
_obs_to_state
(
path
,
states
)
# Go over all observations, and sample a query. With probability 0.5 this
# query is a nearby observation (defined as belonging to the same vertex
# in path).
g
=
self
.
_env
.
graph
queries
=
[]
labels
=
[]
validity_masks
=
[]
query_index_in_observations
=
[]
for
i
,
curr_o
in
enumerate
(
observations
.
values
()[
0
]):
p
=
obs_to_path
[
i
]
low
=
max
(
0
,
i
-
self
.
_max_distance
)
# A list of lists of vertex indices. Each list in this group corresponds
# to one possible label.
index_groups
=
[[],
[],
[]]
# Nearby visited indices, label 1.
nearby_visited
=
[
ii
for
ii
in
path
[
low
:
i
+
1
]
+
g
[
p
].
keys
()
if
ii
in
obs_to_path
[:
i
]
]
nearby_visited
=
[
ii
for
ii
in
index_groups
[
1
]
if
ii
in
path_to_obs
]
# NOT Nearby visited indices, label 0.
not_nearby_visited
=
[
ii
for
ii
in
path
[:
low
]
if
ii
not
in
g
[
p
].
keys
()]
not_nearby_visited
=
[
ii
for
ii
in
index_groups
[
0
]
if
ii
in
path_to_obs
]
# NOT visited indices, label 2.
not_visited
=
[
ii
for
ii
in
range
(
g
.
number_of_nodes
())
if
ii
not
in
path
[:
i
+
1
]
]
index_groups
=
[
not_nearby_visited
,
nearby_visited
,
not_visited
]
# Consider only labels for which there are indices.
allowed_labels
=
[
ii
for
ii
,
group
in
enumerate
(
index_groups
)
if
group
]
label
=
self
.
_rng
.
choice
(
allowed_labels
)
indices
=
list
(
set
(
index_groups
[
label
]))
max_obs_index
=
None
if
label
==
2
else
i
use_exploration_obs
=
False
if
label
==
2
else
True
o
,
obs_index
,
_
=
self
.
_sample_obs
(
indices
=
indices
,
observations
=
observations
.
values
()[
0
],
observation_states
=
states
,
path_to_obs
=
path_to_obs
,
max_obs_index
=
max_obs_index
,
use_exploration_obs
=
use_exploration_obs
)
query_index_in_observations
.
append
(
obs_index
)
# If we cannot sample a valid query, we mark it as not valid in mask.
if
o
is
None
:
label
=
0.0
o
=
curr_o
validity_masks
.
append
(
0
)
else
:
validity_masks
.
append
(
1
)
queries
.
append
(
o
.
values
()[
0
])
labels
.
append
(
label
)
query
=
np
.
concatenate
([
np
.
expand_dims
(
q
,
axis
=
0
)
for
q
in
queries
],
axis
=
0
)
def
one_hot
(
label
,
num_labels
=
3
):
a
=
np
.
zeros
((
num_labels
,),
dtype
=
np
.
float
)
a
[
int
(
label
)]
=
1.0
return
a
outputs
=
np
.
stack
([
one_hot
(
l
)
for
l
in
labels
],
axis
=
0
)
validity_mask
=
np
.
reshape
(
np
.
array
(
validity_masks
,
dtype
=
np
.
int32
),
[
-
1
,
1
])
self
.
info
[
'query_index_in_observations'
]
=
query_index_in_observations
self
.
info
[
'observation_states'
]
=
states
return
observations
,
query
,
(
outputs
,
validity_mask
)
def
target_loss
(
self
,
truth
,
predicted
,
weights
=
None
):
pass
class
NeighboringQueriesTask
(
RandomExplorationBasedTask
):
"""A task of identifying whether two queries are closeby or not.
The proximity between queries is defined by the length of the shorest path
between them.
"""
def
__init__
(
self
,
max_distance
=
1
,
*
args
,
**
kwargs
):
# pylint: disable=keyword-arg-before-vararg
"""Initializes a NeighboringQueriesTask.
Args:
max_distance: integer, the maximum distance in terms of number of vertices
between the two queries, so that they are considered neighboring.
*args: for super class.
**kwargs: for super class.
"""
super
(
NeighboringQueriesTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_max_distance
=
max_distance
if
len
(
self
.
config
.
inputs
.
keys
())
!=
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type'
)
def
episode
(
self
):
"""Episode data.
Returns:
observations: a tuple with one element. This element is a numpy array of
size in_seq_len x observation_size x observation_size x 3 containing
in_seq_len images.
query: a numpy array of size
2 x observation_size X observation_size x 3 containing a pair of query
images.
A tuple of size two. First element is a numpy array of size 2 containing
a one hot vector of whether the two observations are neighobring. Second
element is a boolean numpy value denoting whether this is a valid
episode.
"""
observations
,
states
,
path
=
self
.
_exploration
()
assert
len
(
observations
.
values
()[
0
])
==
len
(
states
)
path_to_obs
,
_
=
self
.
_obs_to_state
(
path
,
states
)
# Restrict path to ones for which observations have been generated.
path
=
[
p
for
p
in
path
if
p
in
path_to_obs
]
# Sample first query.
query1_index
=
self
.
_rng
.
choice
(
path
)
# Sample label.
label
=
self
.
_rng
.
randint
(
2
)
# Sample second query.
# If label == 1, then second query must be nearby, otherwise not.
closest_indices
=
nx
.
single_source_shortest_path
(
self
.
_env
.
graph
,
query1_index
,
self
.
_max_distance
).
keys
()
if
label
==
0
:
# Closest indices on the path.
indices
=
[
p
for
p
in
path
if
p
not
in
closest_indices
]
else
:
# Indices which are not closest on the path.
indices
=
[
p
for
p
in
closest_indices
if
p
in
path
]
query2_index
=
self
.
_rng
.
choice
(
indices
)
# Generate an observation.
query1
,
query1_index
,
_
=
self
.
_sample_obs
(
[
query1_index
],
observations
.
values
()[
0
],
states
,
path_to_obs
,
max_obs_index
=
None
,
use_exploration_obs
=
True
)
query2
,
query2_index
,
_
=
self
.
_sample_obs
(
[
query2_index
],
observations
.
values
()[
0
],
states
,
path_to_obs
,
max_obs_index
=
None
,
use_exploration_obs
=
True
)
queries
=
np
.
concatenate
(
[
np
.
expand_dims
(
q
,
axis
=
0
)
for
q
in
[
query1
,
query2
]])
labels
=
np
.
array
([
0
,
0
])
labels
[
label
]
=
1
is_valid
=
np
.
array
([
1
])
self
.
info
[
'observation_states'
]
=
states
self
.
info
[
'query_indices_in_observations'
]
=
[
query1_index
,
query2_index
]
return
observations
,
queries
,
(
labels
,
is_valid
)
def
target_loss
(
self
,
truth
,
predicted
,
weights
=
None
):
pass
#@gin.configurable
class
GotoStaticXTask
(
RandomExplorationBasedTask
):
"""Task go to a static X.
If continuous reward is used only one goal is allowed so that the reward can
be computed as a delta-distance to that goal..
"""
def
__init__
(
self
,
step_reward
=
0.0
,
goal_reward
=
1.0
,
hit_wall_reward
=-
1.0
,
done_at_target
=
False
,
use_continuous_reward
=
False
,
*
args
,
**
kwargs
):
# pylint: disable=keyword-arg-before-vararg
super
(
GotoStaticXTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
if
len
(
self
.
config
.
inputs
.
keys
())
>
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type or less.'
)
self
.
_step_reward
=
step_reward
self
.
_goal_reward
=
goal_reward
self
.
_hit_wall_reward
=
hit_wall_reward
self
.
_done_at_target
=
done_at_target
self
.
_use_continuous_reward
=
use_continuous_reward
self
.
_previous_path_length
=
None
def
episode
(
self
):
observations
,
_
,
path
=
self
.
_exploration
()
if
len
(
path
)
<
2
:
raise
ValueError
(
'The exploration path has only one node.'
)
g
=
self
.
_env
.
graph
start
=
path
[
-
1
]
while
True
:
goal
=
self
.
_rng
.
choice
(
path
[:
-
1
])
if
goal
!=
start
:
break
goal_path
=
nx
.
shortest_path
(
g
,
start
,
goal
)
init_orientation
=
self
.
_rng
.
uniform
(
0
,
np
.
pi
,
(
1
,))
trajectory
=
np
.
array
(
[
list
(
self
.
_env
.
vertex_to_pose
(
p
))
for
p
in
goal_path
])
init_xy
=
np
.
reshape
(
trajectory
[
0
,
:],
[
-
1
])
init_state
=
np
.
concatenate
([
init_xy
,
init_orientation
],
0
)
trajectory
=
trajectory
[
1
:,
:]
deltas
=
envs_util
.
trajectory_to_deltas
(
trajectory
,
init_state
)
output_seq_len
=
self
.
_config
.
output
.
shape
[
0
]
arr
=
_pad_or_clip_array
(
deltas
,
output_seq_len
,
output_mask
=
True
)
# pylint: disable=unbalanced-tuple-unpacking
thetas
,
_
,
thetas_mask
=
arr
query
=
self
.
_env
.
observation
(
self
.
_env
.
vertex_to_pose
(
goal
)).
values
()[
0
]
return
observations
,
query
,
(
thetas
,
thetas_mask
)
def
reward
(
self
,
obs
,
done
,
info
):
if
'wall_collision'
in
info
and
info
[
'wall_collision'
]:
return
obs
,
self
.
_hit_wall_reward
,
done
,
info
reward
=
0.0
current_vertex
=
self
.
_env
.
pose_to_vertex
(
self
.
_env
.
state
)
if
current_vertex
in
self
.
_env
.
targets
():
if
self
.
_done_at_target
:
done
=
True
else
:
obs
=
self
.
_env
.
reset
()
reward
=
self
.
_goal_reward
else
:
if
self
.
_use_continuous_reward
:
if
len
(
self
.
_env
.
targets
())
!=
1
:
raise
ValueError
(
'FindX task with continuous reward is assuming only one target.'
)
goal_vertex
=
self
.
_env
.
targets
()[
0
]
path_length
=
self
.
_compute_path_length
(
goal_vertex
)
reward
=
self
.
_previous_path_length
-
path_length
self
.
_previous_path_length
=
path_length
else
:
reward
=
self
.
_step_reward
return
obs
,
reward
,
done
,
info
def
_compute_path_length
(
self
,
goal_vertex
):
current_vertex
=
self
.
_env
.
pose_to_vertex
(
self
.
_env
.
state
)
path
=
nx
.
shortest_path
(
self
.
_env
.
graph
,
current_vertex
,
goal_vertex
)
assert
len
(
path
)
>=
2
curr_xy
=
np
.
array
(
self
.
_env
.
state
[:
2
])
next_xy
=
np
.
array
(
self
.
_env
.
vertex_to_pose
(
path
[
1
]))
last_step_distance
=
np
.
linalg
.
norm
(
next_xy
-
curr_xy
)
return
(
len
(
path
)
-
2
)
*
self
.
_env
.
cell_size_px
+
last_step_distance
def
reset
(
self
,
observation
):
if
self
.
_use_continuous_reward
:
if
len
(
self
.
_env
.
targets
())
!=
1
:
raise
ValueError
(
'FindX task with continuous reward is assuming only one target.'
)
goal_vertex
=
self
.
_env
.
targets
()[
0
]
self
.
_previous_path_length
=
self
.
_compute_path_length
(
goal_vertex
)
def
target_loss
(
self
,
truth
,
predicted
,
weights
=
None
):
"""Action classification loss.
Args:
truth: a batch_size x sequence length x number of labels float
Tensor containing a one hot vector for each label in each batch and
time.
predicted: a batch_size x sequence length x number of labels float
Tensor containing a predicted distribution over all actions.
weights: a batch_size x sequence_length float Tensor of bool
denoting which actions are valid.
Returns:
An average cross entropy over all batches and elements in sequence.
"""
return
classification_loss
(
truth
=
truth
,
predicted
=
predicted
,
weights
=
weights
,
is_one_hot
=
True
)
class
RelativeLocationTask
(
RandomExplorationBasedTask
):
"""A task of estimating the relative location of a query w.r.t current.
It is to be used for debugging. It is designed such that the output is a
single value, out of a discrete set of values, so that it can be phrased as
a classification problem.
"""
def
__init__
(
self
,
num_labels
,
*
args
,
**
kwargs
):
"""Initializes a relative location task.
Args:
num_labels: integer, number of orientations to bin the relative
orientation into.
*args: see super class.
**kwargs: see super class.
"""
super
(
RelativeLocationTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_num_labels
=
num_labels
if
len
(
self
.
config
.
inputs
.
keys
())
!=
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type'
)
def
episode
(
self
):
observations
,
states
,
path
=
self
.
_exploration
()
# Select a random element from history.
path_to_obs
,
_
=
self
.
_obs_to_state
(
path
,
states
)
use_exploration_obs
=
not
self
.
_add_query_noise
query
,
_
,
query_state
=
self
.
_sample_obs
(
path
[:
-
1
],
observations
.
values
()[
0
],
states
,
path_to_obs
,
max_obs_index
=
None
,
use_exploration_obs
=
use_exploration_obs
)
x
,
y
,
theta
=
tuple
(
states
[
-
1
])
q_x
,
q_y
,
_
=
tuple
(
query_state
)
t_x
,
t_y
=
q_x
-
x
,
q_y
-
y
(
rt_x
,
rt_y
)
=
(
np
.
sin
(
theta
)
*
t_x
-
np
.
cos
(
theta
)
*
t_y
,
np
.
cos
(
theta
)
*
t_x
+
np
.
sin
(
theta
)
*
t_y
)
# Bins are [a(i), a(i+1)] for a(i) = -pi + 0.5 * bin_size + i * bin_size.
shift
=
np
.
pi
*
(
1
-
1.0
/
(
2.0
*
self
.
_num_labels
))
orientation
=
np
.
arctan2
(
rt_y
,
rt_x
)
+
shift
if
orientation
<
0
:
orientation
+=
2
*
np
.
pi
label
=
int
(
np
.
floor
(
self
.
_num_labels
*
orientation
/
(
2
*
np
.
pi
)))
out_shape
=
self
.
_config
.
output
.
shape
if
len
(
out_shape
)
!=
1
:
raise
ValueError
(
'Output shape should be of rank 1.'
)
if
out_shape
[
0
]
!=
self
.
_num_labels
:
raise
ValueError
(
'Output shape must be of size %d'
%
self
.
_num_labels
)
output
=
np
.
zeros
(
out_shape
,
dtype
=
np
.
float32
)
output
[
label
]
=
1
return
observations
,
query
,
(
output
,
None
)
def
target_loss
(
self
,
truth
,
predicted
,
weights
=
None
):
return
classification_loss
(
truth
=
truth
,
predicted
=
predicted
,
weights
=
weights
,
is_one_hot
=
True
)
class
LocationClassificationTask
(
UnrolledTask
):
"""A task of classifying a location as one of several classes.
The task does not have an input, but just a query and an output. The query
is an observation of the current location, e.g. an image taken from the
current state. The output is a label classifying this location in one of
predefined set of locations (or landmarks).
The current implementation classifies locations as intersections based on the
number and directions of biforcations. It is expected that a location can have
at most 4 different directions, aligned with the axes. As each of these four
directions might be present or not, the number of possible intersections are
2^4 = 16.
"""
def
__init__
(
self
,
env
,
seed
,
*
args
,
**
kwargs
):
super
(
LocationClassificationTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_env
=
env
self
.
_rng
=
np
.
random
.
RandomState
(
seed
)
# A location property which can be set. If not set, a random one is
# generated.
self
.
_location
=
None
if
len
(
self
.
config
.
inputs
.
keys
())
>
1
:
raise
NotImplementedError
(
'current implementation supports input '
'with only one modality type or less.'
)
@
property
def
location
(
self
):
return
self
.
_location
@
location
.
setter
def
location
(
self
,
location
):
self
.
_location
=
location
def
episode
(
self
):
# Get a location. If not set, sample on at a vertex with a random
# orientation
location
=
self
.
_location
if
location
is
None
:
num_nodes
=
self
.
_env
.
graph
.
number_of_nodes
()
vertex
=
int
(
math
.
floor
(
self
.
_rng
.
uniform
(
0
,
num_nodes
)))
xy
=
self
.
_env
.
vertex_to_pose
(
vertex
)
theta
=
self
.
_rng
.
uniform
(
0
,
2
*
math
.
pi
)
location
=
np
.
concatenate
(
[
np
.
reshape
(
xy
,
[
-
1
]),
np
.
array
([
theta
])],
axis
=
0
)
else
:
vertex
=
self
.
_env
.
pose_to_vertex
(
location
)
theta
=
location
[
2
]
neighbors
=
self
.
_env
.
graph
.
neighbors
(
vertex
)
xy_s
=
[
self
.
_env
.
vertex_to_pose
(
n
)
for
n
in
neighbors
]
def
rotate
(
xy
,
theta
):
"""Rotates a vector around the origin by angle theta.
Args:
xy: a numpy darray of shape (2, ) of floats containing the x and y
coordinates of a vector.
theta: a python float containing the rotation angle in radians.
Returns:
A numpy darray of floats of shape (2,) containing the x and y
coordinates rotated xy.
"""
rotated_x
=
np
.
cos
(
theta
)
*
xy
[
0
]
-
np
.
sin
(
theta
)
*
xy
[
1
]
rotated_y
=
np
.
sin
(
theta
)
*
xy
[
0
]
+
np
.
cos
(
theta
)
*
xy
[
1
]
return
np
.
array
([
rotated_x
,
rotated_y
])
# Rotate all intersection biforcation by the orientation of the agent as the
# intersection label is defined in an agent centered fashion.
xy_s
=
[
rotate
(
xy
-
location
[
0
:
2
],
-
location
[
2
]
-
math
.
pi
/
4
)
for
xy
in
xy_s
]
th_s
=
[
np
.
arctan2
(
xy
[
1
],
xy
[
0
])
for
xy
in
xy_s
]
out_shape
=
self
.
_config
.
output
.
shape
if
len
(
out_shape
)
!=
1
:
raise
ValueError
(
'Output shape should be of rank 1.'
)
num_labels
=
out_shape
[
0
]
if
num_labels
!=
16
:
raise
ValueError
(
'Currently only 16 labels are supported '
'(there are 16 different 4 way intersection types).'
)
th_s
=
set
([
int
(
math
.
floor
(
4
*
(
th
/
(
2
*
np
.
pi
)
+
0.5
)))
for
th
in
th_s
])
one_hot_label
=
np
.
zeros
((
num_labels
,),
dtype
=
np
.
float32
)
label
=
0
for
th
in
th_s
:
label
+=
pow
(
2
,
th
)
one_hot_label
[
int
(
label
)]
=
1.0
query
=
self
.
_env
.
observation
(
location
).
values
()[
0
]
return
[],
query
,
(
one_hot_label
,
None
)
def
reward
(
self
,
obs
,
done
,
info
):
raise
ValueError
(
'Do not call.'
)
def
target_loss
(
self
,
truth
,
predicted
,
weights
=
None
):
return
classification_loss
(
truth
=
truth
,
predicted
=
predicted
,
weights
=
weights
,
is_one_hot
=
True
)
class
GotoStaticXNoExplorationTask
(
UnrolledTask
):
"""An interface for findX tasks without exploration.
The agent is initialized a random location in a random world and a random goal
and the objective is for the agent to move toward the goal. This class
generates episode for such task. Each generates a sequence of observations x
and target outputs y. x is the observations and is an OrderedDict with keys
provided from config.inputs.keys() and the shapes provided in the
config.inputs. The output is a numpy arrays with the shape specified in the
config.output. The shape of the array is (sequence_length x action_size) where
action is the number of actions that can be done in the environment. Note that
config.output.shape should be set according to the number of actions that can
be done in the env.
target outputs y are the groundtruth value of each action that is computed
from the environment graph. The target output for each action is proportional
to the progress that each action makes. Target value of 1 means that the
action takes the agent one step closer, -1 means the action takes the agent
one step farther. Value of -2 means that action should not take place at all.
This can be because the action leads to collision or it wants to terminate the
episode prematurely.
"""
def
__init__
(
self
,
env
,
*
args
,
**
kwargs
):
super
(
GotoStaticXNoExplorationTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
if
self
.
_config
.
query
is
not
None
:
raise
ValueError
(
'query should be None.'
)
if
len
(
self
.
_config
.
output
.
shape
)
!=
2
:
raise
ValueError
(
'output should only have two dimensions:'
'(sequence_length x number_of_actions)'
)
for
input_config
in
self
.
_config
.
inputs
.
values
():
if
input_config
.
shape
[
0
]
!=
self
.
_config
.
output
.
shape
[
0
]:
raise
ValueError
(
'the first dimension of the input and output should'
'be the same.'
)
if
len
(
self
.
_config
.
output
.
shape
)
!=
2
:
raise
ValueError
(
'output shape should be '
'(sequence_length x number_of_actions)'
)
self
.
_env
=
env
def
_compute_shortest_path_length
(
self
,
vertex
,
target_vertices
):
"""Computes length of the shortest path from vertex to any target vertexes.
Args:
vertex: integer, index of the vertex in the environment graph.
target_vertices: list of the target vertexes
Returns:
integer, minimum distance from the vertex to any of the target_vertices.
Raises:
ValueError: if there is no path between the vertex and at least one of
the target_vertices.
"""
try
:
return
np
.
min
([
len
(
nx
.
shortest_path
(
self
.
_env
.
graph
,
vertex
,
t
))
for
t
in
target_vertices
])
except
:
#logging.error('there is no path between vertex %d and at least one of '
# 'the targets %r', vertex, target_vertices)
raise
def
_compute_gt_value
(
self
,
vertex
,
target_vertices
):
"""Computes groundtruth value of all the actions at the vertex.
The value of each action is the difference each action makes in the length
of the shortest path to the goal. If an action takes the agent one step
closer to the goal the value is 1. In case, it takes the agent one step away
from the goal it would be -1. If it leads to collision or if the agent uses
action stop before reaching to the goal it is -2. To avoid scale issues the
gt_values are multipled by 0.5.
Args:
vertex: integer, the index of current vertex.
target_vertices: list of the integer indexes of the target views.
Returns:
numpy array with shape (action_size,) and each element is the groundtruth
value of each action based on the progress each action makes.
"""
action_size
=
self
.
_config
.
output
.
shape
[
1
]
output_value
=
np
.
ones
((
action_size
),
dtype
=
np
.
float32
)
*
-
2
my_distance
=
self
.
_compute_shortest_path_length
(
vertex
,
target_vertices
)
for
adj
in
self
.
_env
.
graph
[
vertex
]:
adj_distance
=
self
.
_compute_shortest_path_length
(
adj
,
target_vertices
)
if
adj_distance
is
None
:
continue
action_index
=
self
.
_env
.
action
(
self
.
_env
.
vertex_to_pose
(
vertex
),
self
.
_env
.
vertex_to_pose
(
adj
))
assert
action_index
is
not
None
,
(
'{} is not adjacent to {}. There might '
'be a problem in environment graph '
'connectivity because there is no '
'direct edge between the given '
'vertices'
).
format
(
self
.
_env
.
vertex_to_pose
(
vertex
),
self
.
_env
.
vertex_to_pose
(
adj
))
output_value
[
action_index
]
=
my_distance
-
adj_distance
return
output_value
*
0.5
def
episode
(
self
):
"""Returns data needed to train and test a single episode.
Returns:
(inputs, None, output) where inputs is a dictionary of modality types to
numpy arrays. The second element is query but we assume that the goal
is also given as part of observation so it should be None for this task,
and the outputs is the tuple of ground truth action values with the
shape of (sequence_length x action_size) that is coming from
config.output.shape and a numpy array with the shape of
(sequence_length,) that is 1 if the corresponding element of the
input and output should be used in the training optimization.
Raises:
ValueError: If the output values for env.random_step_sequence is not
valid.
ValueError: If the shape of observations coming from the env is not
consistent with the config.
ValueError: If there is a modality type specified in the config but the
environment does not return that.
"""
# Sequence length is the first dimension of any of the input tensors.
sequence_length
=
self
.
_config
.
inputs
.
values
()[
0
].
shape
[
0
]
modality_types
=
self
.
_config
.
inputs
.
keys
()
path
,
_
,
_
,
step_outputs
=
self
.
_env
.
random_step_sequence
(
max_len
=
sequence_length
)
target_vertices
=
[
self
.
_env
.
pose_to_vertex
(
x
)
for
x
in
self
.
_env
.
targets
()]
if
len
(
path
)
!=
len
(
step_outputs
):
raise
ValueError
(
'path, and step_outputs should have equal length'
' {}!={}'
.
format
(
len
(
path
),
len
(
step_outputs
)))
# Building up observations. observations will be a OrderedDict of
# modality types. The values are numpy arrays that follow the given shape
# in the input config for each modality type.
observations
=
collections
.
OrderedDict
([
k
,
[]]
for
k
in
modality_types
)
for
step_output
in
step_outputs
:
obs_dict
=
step_output
[
0
]
# Only going over the modality types that are specified in the input
# config.
for
modality_type
in
modality_types
:
if
modality_type
not
in
obs_dict
:
raise
ValueError
(
'modality type is not returned from the environment.'
'{} not in {}'
.
format
(
modality_type
,
obs_dict
.
keys
()))
obs
=
obs_dict
[
modality_type
]
if
np
.
any
(
obs
.
shape
!=
tuple
(
self
.
_config
.
inputs
[
modality_type
].
shape
[
1
:])):
raise
ValueError
(
'The observations should have the same size as speicifed in'
'config for modality type {}. {} != {}'
.
format
(
modality_type
,
obs
.
shape
,
self
.
_config
.
inputs
[
modality_type
].
shape
[
1
:]))
observations
[
modality_type
].
append
(
obs
)
gt_value
=
[
self
.
_compute_gt_value
(
v
,
target_vertices
)
for
v
in
path
]
# pylint: disable=unbalanced-tuple-unpacking
gt_value
,
_
,
value_mask
=
_pad_or_clip_array
(
np
.
array
(
gt_value
),
sequence_length
,
is_front_clip
=
False
,
output_mask
=
True
,
)
for
modality_type
,
obs
in
observations
.
iteritems
():
observations
[
modality_type
],
_
,
mask
=
_pad_or_clip_array
(
np
.
array
(
obs
),
sequence_length
,
is_front_clip
=
False
,
output_mask
=
True
)
assert
np
.
all
(
mask
==
value_mask
)
return
observations
,
None
,
(
gt_value
,
value_mask
)
def
reset
(
self
,
observation
):
"""Called after the environment is reset."""
pass
def
target_loss
(
self
,
true_targets
,
targets
,
weights
=
None
):
"""A loss for training a task model.
This loss measures the discrepancy between the task outputs, the true and
predicted ones.
Args:
true_targets: tf.Tensor of tf.float32 with the shape of
(batch_size x sequence_length x action_size).
targets: tf.Tensor of tf.float32 with the shape of
(batch_size x sequence_length x action_size).
weights: tf.Tensor of tf.bool with the shape of
(batch_size x sequence_length).
Raises:
ValueError: if the shapes of the input tensors are not consistent.
Returns:
L2 loss between the predicted action values and true action values.
"""
targets_shape
=
targets
.
get_shape
().
as_list
()
true_targets_shape
=
true_targets
.
get_shape
().
as_list
()
if
len
(
targets_shape
)
!=
3
or
len
(
true_targets_shape
)
!=
3
:
raise
ValueError
(
'invalid shape for targets or true_targets_shape'
)
if
np
.
any
(
targets_shape
!=
true_targets_shape
):
raise
ValueError
(
'the shape of targets and true_targets are not the same'
'{} != {}'
.
format
(
targets_shape
,
true_targets_shape
))
if
weights
is
not
None
:
# Filtering targets and true_targets using weights.
weights_shape
=
weights
.
get_shape
().
as_list
()
if
np
.
any
(
weights_shape
!=
targets_shape
[
0
:
2
]):
raise
ValueError
(
'The first two elements of weights shape should match'
'target. {} != {}'
.
format
(
weights_shape
,
targets_shape
))
true_targets
=
tf
.
boolean_mask
(
true_targets
,
weights
)
targets
=
tf
.
boolean_mask
(
targets
,
weights
)
return
tf
.
losses
.
mean_squared_error
(
tf
.
reshape
(
targets
,
[
-
1
]),
tf
.
reshape
(
true_targets
,
[
-
1
]))
def
reward
(
self
,
obs
,
done
,
info
):
raise
NotImplementedError
(
'reward is not implemented for this task'
)
################################################################################
class
NewTask
(
UnrolledTask
):
def
__init__
(
self
,
env
,
*
args
,
**
kwargs
):
super
(
NewTask
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_env
=
env
def
_compute_shortest_path_length
(
self
,
vertex
,
target_vertices
):
"""Computes length of the shortest path from vertex to any target vertexes.
Args:
vertex: integer, index of the vertex in the environment graph.
target_vertices: list of the target vertexes
Returns:
integer, minimum distance from the vertex to any of the target_vertices.
Raises:
ValueError: if there is no path between the vertex and at least one of
the target_vertices.
"""
try
:
return
np
.
min
([
len
(
nx
.
shortest_path
(
self
.
_env
.
graph
,
vertex
,
t
))
for
t
in
target_vertices
])
except
:
logging
.
error
(
'there is no path between vertex %d and at least one of '
'the targets %r'
,
vertex
,
target_vertices
)
raise
def
_compute_gt_value
(
self
,
vertex
,
target_vertices
):
"""Computes groundtruth value of all the actions at the vertex.
The value of each action is the difference each action makes in the length
of the shortest path to the goal. If an action takes the agent one step
closer to the goal the value is 1. In case, it takes the agent one step away
from the goal it would be -1. If it leads to collision or if the agent uses
action stop before reaching to the goal it is -2. To avoid scale issues the
gt_values are multipled by 0.5.
Args:
vertex: integer, the index of current vertex.
target_vertices: list of the integer indexes of the target views.
Returns:
numpy array with shape (action_size,) and each element is the groundtruth
value of each action based on the progress each action makes.
"""
action_size
=
self
.
_config
.
output
.
shape
[
1
]
output_value
=
np
.
ones
((
action_size
),
dtype
=
np
.
float32
)
*
-
2
# own compute _compute_shortest_path_length - returnts float
my_distance
=
self
.
_compute_shortest_path_length
(
vertex
,
target_vertices
)
for
adj
in
self
.
_env
.
graph
[
vertex
]:
adj_distance
=
self
.
_compute_shortest_path_length
(
adj
,
target_vertices
)
if
adj_distance
is
None
:
continue
action_index
=
self
.
_env
.
action
(
self
.
_env
.
vertex_to_pose
(
vertex
),
self
.
_env
.
vertex_to_pose
(
adj
))
assert
action_index
is
not
None
,
(
'{} is not adjacent to {}. There might '
'be a problem in environment graph '
'connectivity because there is no '
'direct edge between the given '
'vertices'
).
format
(
self
.
_env
.
vertex_to_pose
(
vertex
),
self
.
_env
.
vertex_to_pose
(
adj
))
output_value
[
action_index
]
=
my_distance
-
adj_distance
return
output_value
*
0.5
def
episode
(
self
):
"""Returns data needed to train and test a single episode.
Returns:
(inputs, None, output) where inputs is a dictionary of modality types to
numpy arrays. The second element is query but we assume that the goal
is also given as part of observation so it should be None for this task,
and the outputs is the tuple of ground truth action values with the
shape of (sequence_length x action_size) that is coming from
config.output.shape and a numpy array with the shape of
(sequence_length,) that is 1 if the corresponding element of the
input and output should be used in the training optimization.
Raises:
ValueError: If the output values for env.random_step_sequence is not
valid.
ValueError: If the shape of observations coming from the env is not
consistent with the config.
ValueError: If there is a modality type specified in the config but the
environment does not return that.
"""
# Sequence length is the first dimension of any of the input tensors.
sequence_length
=
self
.
_config
.
inputs
.
values
()[
0
].
shape
[
0
]
modality_types
=
self
.
_config
.
inputs
.
keys
()
path
,
_
,
_
,
step_outputs
=
self
.
_env
.
random_step_sequence
(
max_len
=
sequence_length
)
target_vertices
=
[
self
.
_env
.
pose_to_vertex
(
x
)
for
x
in
self
.
_env
.
targets
()]
if
len
(
path
)
!=
len
(
step_outputs
):
raise
ValueError
(
'path, and step_outputs should have equal length'
' {}!={}'
.
format
(
len
(
path
),
len
(
step_outputs
)))
# Building up observations. observations will be a OrderedDict of
# modality types. The values are numpy arrays that follow the given shape
# in the input config for each modality type.
observations
=
collections
.
OrderedDict
([
k
,
[]]
for
k
in
modality_types
)
for
step_output
in
step_outputs
:
obs_dict
=
step_output
[
0
]
# Only going over the modality types that are specified in the input
# config.
for
modality_type
in
modality_types
:
if
modality_type
not
in
obs_dict
:
raise
ValueError
(
'modality type is not returned from the environment.'
'{} not in {}'
.
format
(
modality_type
,
obs_dict
.
keys
()))
obs
=
obs_dict
[
modality_type
]
if
np
.
any
(
obs
.
shape
!=
tuple
(
self
.
_config
.
inputs
[
modality_type
].
shape
[
1
:])):
raise
ValueError
(
'The observations should have the same size as speicifed in'
'config for modality type {}. {} != {}'
.
format
(
modality_type
,
obs
.
shape
,
self
.
_config
.
inputs
[
modality_type
].
shape
[
1
:]))
observations
[
modality_type
].
append
(
obs
)
gt_value
=
[
self
.
_compute_gt_value
(
v
,
target_vertices
)
for
v
in
path
]
# pylint: disable=unbalanced-tuple-unpacking
gt_value
,
_
,
value_mask
=
_pad_or_clip_array
(
np
.
array
(
gt_value
),
sequence_length
,
is_front_clip
=
False
,
output_mask
=
True
,
)
for
modality_type
,
obs
in
observations
.
iteritems
():
observations
[
modality_type
],
_
,
mask
=
_pad_or_clip_array
(
np
.
array
(
obs
),
sequence_length
,
is_front_clip
=
False
,
output_mask
=
True
)
assert
np
.
all
(
mask
==
value_mask
)
return
observations
,
None
,
(
gt_value
,
value_mask
)
def
reset
(
self
,
observation
):
"""Called after the environment is reset."""
pass
def
target_loss
(
self
,
true_targets
,
targets
,
weights
=
None
):
"""A loss for training a task model.
This loss measures the discrepancy between the task outputs, the true and
predicted ones.
Args:
true_targets: tf.Tensor of tf.float32 with the shape of
(batch_size x sequence_length x action_size).
targets: tf.Tensor of tf.float32 with the shape of
(batch_size x sequence_length x action_size).
weights: tf.Tensor of tf.bool with the shape of
(batch_size x sequence_length).
Raises:
ValueError: if the shapes of the input tensors are not consistent.
Returns:
L2 loss between the predicted action values and true action values.
"""
targets_shape
=
targets
.
get_shape
().
as_list
()
true_targets_shape
=
true_targets
.
get_shape
().
as_list
()
if
len
(
targets_shape
)
!=
3
or
len
(
true_targets_shape
)
!=
3
:
raise
ValueError
(
'invalid shape for targets or true_targets_shape'
)
if
np
.
any
(
targets_shape
!=
true_targets_shape
):
raise
ValueError
(
'the shape of targets and true_targets are not the same'
'{} != {}'
.
format
(
targets_shape
,
true_targets_shape
))
if
weights
is
not
None
:
# Filtering targets and true_targets using weights.
weights_shape
=
weights
.
get_shape
().
as_list
()
if
np
.
any
(
weights_shape
!=
targets_shape
[
0
:
2
]):
raise
ValueError
(
'The first two elements of weights shape should match'
'target. {} != {}'
.
format
(
weights_shape
,
targets_shape
))
true_targets
=
tf
.
boolean_mask
(
true_targets
,
weights
)
targets
=
tf
.
boolean_mask
(
targets
,
weights
)
return
tf
.
losses
.
mean_squared_error
(
tf
.
reshape
(
targets
,
[
-
1
]),
tf
.
reshape
(
true_targets
,
[
-
1
]))
def
reward
(
self
,
obs
,
done
,
info
):
raise
NotImplementedError
(
'reward is not implemented for this task'
)
research/cognitive_planning/train_supervised_active_vision.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=line-too-long
# pyformat: disable
"""Train and eval for supervised navigation training.
For training:
python train_supervised_active_vision.py
\
--mode='train'
\
--logdir=$logdir/checkin_log_det/
\
--modality_types='det'
\
--batch_size=8
\
--train_iters=200000
\
--lstm_cell_size=2048
\
--policy_fc_size=2048
\
--sequence_length=20
\
--max_eval_episode_length=100
\
--test_iters=194
\
--gin_config=envs/configs/active_vision_config.gin
\
--gin_params='ActiveVisionDatasetEnv.dataset_root="$datadir"'
\
--logtostderr
For testing:
python train_supervised_active_vision.py
--mode='eval'
\
--logdir=$logdir/checkin_log_det/
\
--modality_types='det'
\
--batch_size=8
\
--train_iters=200000
\
--lstm_cell_size=2048
\
--policy_fc_size=2048
\
--sequence_length=20
\
--max_eval_episode_length=100
\
--test_iters=194
\
--gin_config=envs/configs/active_vision_config.gin
\
--gin_params='ActiveVisionDatasetEnv.dataset_root="$datadir"'
\
--logtostderr
"""
import
collections
import
os
import
time
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
networkx
as
nx
import
numpy
as
np
import
tensorflow
as
tf
import
gin
import
embedders
import
policies
import
tasks
from
envs
import
active_vision_dataset_env
from
envs
import
task_env
slim
=
tf
.
contrib
.
slim
flags
.
DEFINE_string
(
'logdir'
,
''
,
'Path to a directory to write summaries and checkpoints'
)
# Parameters controlling the training setup. In general one would not need to
# modify them.
flags
.
DEFINE_string
(
'master'
,
'local'
,
'BNS name of the TensorFlow master, or local.'
)
flags
.
DEFINE_integer
(
'task_id'
,
0
,
'Task id of the replica running the training.'
)
flags
.
DEFINE_integer
(
'ps_tasks'
,
0
,
'Number of tasks in the ps job. If 0 no ps job is used.'
)
flags
.
DEFINE_integer
(
'decay_steps'
,
1000
,
'Number of steps for exponential decay.'
)
flags
.
DEFINE_float
(
'learning_rate'
,
0.0001
,
'Learning rate.'
)
flags
.
DEFINE_integer
(
'batch_size'
,
8
,
'Batch size.'
)
flags
.
DEFINE_integer
(
'sequence_length'
,
20
,
'sequence length'
)
flags
.
DEFINE_integer
(
'train_iters'
,
200000
,
'number of training iterations.'
)
flags
.
DEFINE_integer
(
'save_summaries_secs'
,
300
,
'number of seconds between saving summaries'
)
flags
.
DEFINE_integer
(
'save_interval_secs'
,
300
,
'numer of seconds between saving variables'
)
flags
.
DEFINE_integer
(
'log_every_n_steps'
,
20
,
'number of steps between logging'
)
flags
.
DEFINE_string
(
'modality_types'
,
''
,
'modality names in _ separated format'
)
flags
.
DEFINE_string
(
'conv_window_sizes'
,
'8_4_3'
,
'conv window size in separated by _'
)
flags
.
DEFINE_string
(
'conv_strides'
,
'4_2_1'
,
''
)
flags
.
DEFINE_string
(
'conv_channels'
,
'8_16_16'
,
''
)
flags
.
DEFINE_integer
(
'embedding_fc_size'
,
128
,
'size of embedding for each modality'
)
flags
.
DEFINE_integer
(
'obs_resolution'
,
64
,
'resolution of the input observations'
)
flags
.
DEFINE_integer
(
'lstm_cell_size'
,
2048
,
'size of lstm cell size'
)
flags
.
DEFINE_integer
(
'policy_fc_size'
,
2048
,
'size of fully connected layers for policy part'
)
flags
.
DEFINE_float
(
'weight_decay'
,
0.0002
,
'weight decay'
)
flags
.
DEFINE_integer
(
'goal_category_count'
,
5
,
'number of goal categories'
)
flags
.
DEFINE_integer
(
'action_size'
,
7
,
'number of possible actions'
)
flags
.
DEFINE_integer
(
'max_eval_episode_length'
,
100
,
'maximum sequence length for evaluation.'
)
flags
.
DEFINE_enum
(
'mode'
,
'train'
,
[
'train'
,
'eval'
],
'indicates whether it is in training or evaluation'
)
flags
.
DEFINE_integer
(
'test_iters'
,
194
,
'number of iterations that the eval needs to be run'
)
flags
.
DEFINE_multi_string
(
'gin_config'
,
[],
'List of paths to a gin config files for the env.'
)
flags
.
DEFINE_multi_string
(
'gin_params'
,
[],
'Newline separated list of Gin parameter bindings.'
)
flags
.
DEFINE_string
(
'resnet50_path'
,
'./resnet_v2_50_checkpoint/resnet_v2_50.ckpt'
,
'path to resnet50'
'checkpoint'
)
flags
.
DEFINE_bool
(
'freeze_resnet_weights'
,
True
,
''
)
flags
.
DEFINE_string
(
'eval_init_points_file_name'
,
''
,
'Name of the file that containts the initial locations and'
'worlds for each evalution point'
)
FLAGS
=
flags
.
FLAGS
TRAIN_WORLDS
=
[
'Home_001_1'
,
'Home_001_2'
,
'Home_002_1'
,
'Home_003_1'
,
'Home_003_2'
,
'Home_004_1'
,
'Home_004_2'
,
'Home_005_1'
,
'Home_005_2'
,
'Home_006_1'
,
'Home_010_1'
]
TEST_WORLDS
=
[
'Home_011_1'
,
'Home_013_1'
,
'Home_016_1'
]
def
create_modality_types
():
"""Parses the modality_types and returns a list of task_env.ModalityType."""
if
not
FLAGS
.
modality_types
:
raise
ValueError
(
'there needs to be at least one modality type'
)
modality_types
=
FLAGS
.
modality_types
.
split
(
'_'
)
for
x
in
modality_types
:
if
x
not
in
[
'image'
,
'sseg'
,
'det'
,
'depth'
]:
raise
ValueError
(
'invalid modality type: {}'
.
format
(
x
))
conversion_dict
=
{
'image'
:
task_env
.
ModalityTypes
.
IMAGE
,
'sseg'
:
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
,
'depth'
:
task_env
.
ModalityTypes
.
DEPTH
,
'det'
:
task_env
.
ModalityTypes
.
OBJECT_DETECTION
,
}
return
[
conversion_dict
[
k
]
for
k
in
modality_types
]
def
create_task_io_config
(
modality_types
,
goal_category_count
,
action_size
,
sequence_length
,
):
"""Generates task io config."""
shape_prefix
=
[
sequence_length
,
FLAGS
.
obs_resolution
,
FLAGS
.
obs_resolution
]
shapes
=
{
task_env
.
ModalityTypes
.
IMAGE
:
[
sequence_length
,
224
,
224
,
3
],
task_env
.
ModalityTypes
.
DEPTH
:
shape_prefix
+
[
2
,
],
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
:
shape_prefix
+
[
1
,
],
task_env
.
ModalityTypes
.
OBJECT_DETECTION
:
shape_prefix
+
[
90
,
]
}
types
=
{
k
:
tf
.
float32
for
k
in
shapes
}
types
[
task_env
.
ModalityTypes
.
IMAGE
]
=
tf
.
uint8
inputs
=
collections
.
OrderedDict
(
[[
mtype
,
(
types
[
mtype
],
shapes
[
mtype
])]
for
mtype
in
modality_types
])
inputs
[
task_env
.
ModalityTypes
.
GOAL
]
=
(
tf
.
float32
,
[
sequence_length
,
goal_category_count
])
inputs
[
task_env
.
ModalityTypes
.
PREV_ACTION
]
=
(
tf
.
float32
,
[
sequence_length
,
action_size
+
1
])
print
inputs
return
tasks
.
UnrolledTaskIOConfig
(
inputs
=
inputs
,
output
=
(
tf
.
float32
,
[
sequence_length
,
action_size
]),
query
=
None
)
def
map_to_embedder
(
modality_type
):
"""Maps modality_type to its corresponding embedder."""
if
modality_type
==
task_env
.
ModalityTypes
.
PREV_ACTION
:
return
None
if
modality_type
==
task_env
.
ModalityTypes
.
GOAL
:
return
embedders
.
IdentityEmbedder
()
if
modality_type
==
task_env
.
ModalityTypes
.
IMAGE
:
return
embedders
.
ResNet50Embedder
()
conv_window_sizes
=
[
int
(
x
)
for
x
in
FLAGS
.
conv_window_sizes
.
split
(
'_'
)]
conv_channels
=
[
int
(
x
)
for
x
in
FLAGS
.
conv_channels
.
split
(
'_'
)]
conv_strides
=
[
int
(
x
)
for
x
in
FLAGS
.
conv_strides
.
split
(
'_'
)]
params
=
tf
.
contrib
.
training
.
HParams
(
to_one_hot
=
modality_type
==
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
,
one_hot_length
=
10
,
conv_sizes
=
conv_window_sizes
,
conv_strides
=
conv_strides
,
conv_channels
=
conv_channels
,
embedding_size
=
FLAGS
.
embedding_fc_size
,
weight_decay_rate
=
FLAGS
.
weight_decay
,
)
return
embedders
.
SmallNetworkEmbedder
(
params
)
def
create_train_and_init_ops
(
policy
,
task
):
"""Creates training ops given the arguments.
Args:
policy: the policy for the task.
task: the task instance.
Returns:
train_op: the op that needs to be runned at each step.
summaries_op: the summary op that is executed.
init_fn: the op that initializes the variables if there is no previous
checkpoint. If Resnet50 is not used in the model it is None, otherwise
it reads the weights from FLAGS.resnet50_path and sets the init_fn
to the op that initializes the ResNet50 with the pre-trained weights.
"""
assert
isinstance
(
task
,
tasks
.
GotoStaticXNoExplorationTask
)
assert
isinstance
(
policy
,
policies
.
Policy
)
inputs
,
_
,
gt_outputs
,
masks
=
task
.
tf_episode_batch
(
FLAGS
.
batch_size
)
outputs
,
_
=
policy
.
build
(
inputs
,
None
)
loss
=
task
.
target_loss
(
gt_outputs
,
outputs
,
masks
)
init_fn
=
None
# If resnet is added to the graph, init_fn should initialize resnet weights
# if there is no previous checkpoint.
variables_assign_dict
=
{}
vars_list
=
[]
for
v
in
slim
.
get_model_variables
():
if
v
.
name
.
find
(
'resnet'
)
>=
0
:
if
not
FLAGS
.
freeze_resnet_weights
:
vars_list
.
append
(
v
)
variables_assign_dict
[
v
.
name
[
v
.
name
.
find
(
'resnet'
):
-
2
]]
=
v
else
:
vars_list
.
append
(
v
)
global_step
=
tf
.
train
.
get_or_create_global_step
()
learning_rate
=
tf
.
train
.
exponential_decay
(
FLAGS
.
learning_rate
,
global_step
,
decay_steps
=
FLAGS
.
decay_steps
,
decay_rate
=
0.98
,
staircase
=
True
)
optimizer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
)
train_op
=
slim
.
learning
.
create_train_op
(
loss
,
optimizer
,
global_step
=
global_step
,
variables_to_train
=
vars_list
,
)
if
variables_assign_dict
:
init_fn
=
slim
.
assign_from_checkpoint_fn
(
FLAGS
.
resnet50_path
,
variables_assign_dict
,
ignore_missing_vars
=
False
)
scalar_summaries
=
{}
scalar_summaries
[
'LR'
]
=
learning_rate
scalar_summaries
[
'loss'
]
=
loss
for
name
,
summary
in
scalar_summaries
.
iteritems
():
tf
.
summary
.
scalar
(
name
,
summary
)
return
train_op
,
init_fn
def
create_eval_ops
(
policy
,
config
,
possible_targets
):
"""Creates the necessary ops for evaluation."""
inputs_feed
=
collections
.
OrderedDict
([[
mtype
,
tf
.
placeholder
(
config
.
inputs
[
mtype
].
type
,
[
1
]
+
config
.
inputs
[
mtype
].
shape
)
]
for
mtype
in
config
.
inputs
])
inputs_feed
[
task_env
.
ModalityTypes
.
PREV_ACTION
]
=
tf
.
placeholder
(
tf
.
float32
,
[
1
,
1
]
+
[
config
.
output
.
shape
[
-
1
]
+
1
,
])
prev_state_feed
=
[
tf
.
placeholder
(
tf
.
float32
,
[
1
,
FLAGS
.
lstm_cell_size
],
name
=
'prev_state_{}'
.
format
(
i
))
for
i
in
range
(
2
)
]
policy_outputs
=
policy
.
build
(
inputs_feed
,
prev_state_feed
)
summary_feed
=
{}
for
c
in
possible_targets
+
[
'mean'
]:
summary_feed
[
c
]
=
tf
.
placeholder
(
tf
.
float32
,
[],
name
=
'eval_in_range_{}_input'
.
format
(
c
))
tf
.
summary
.
scalar
(
'eval_in_range_{}'
.
format
(
c
),
summary_feed
[
c
])
return
inputs_feed
,
prev_state_feed
,
policy_outputs
,
(
tf
.
summary
.
merge_all
(),
summary_feed
)
def
unroll_policy_for_eval
(
sess
,
env
,
inputs_feed
,
prev_state_feed
,
policy_outputs
,
number_of_steps
,
output_folder
,
):
"""unrolls the policy for testing.
Args:
sess: tf.Session
env: The environment.
inputs_feed: dictionary of placeholder for the input modalities.
prev_state_feed: placeholder for the input to the prev_state of the model.
policy_outputs: tensor that contains outputs of the policy.
number_of_steps: maximum number of unrolling steps.
output_folder: output_folder where the function writes a dictionary of
detailed information about the path. The dictionary keys are 'states' and
'distance'. The value for 'states' is the list of states that the agent
goes along the path. The value for 'distance' contains the length of
shortest path to the goal at each step.
Returns:
states: list of states along the path.
distance: list of distances along the path.
"""
prev_state
=
[
np
.
zeros
((
1
,
FLAGS
.
lstm_cell_size
),
dtype
=
np
.
float32
)
for
_
in
range
(
2
)
]
prev_action
=
np
.
zeros
((
1
,
1
,
FLAGS
.
action_size
+
1
),
dtype
=
np
.
float32
)
obs
=
env
.
reset
()
distances_to_goal
=
[]
states
=
[]
unique_id
=
'{}_{}'
.
format
(
env
.
cur_image_id
(),
env
.
goal_string
)
for
_
in
range
(
number_of_steps
):
distances_to_goal
.
append
(
np
.
min
([
len
(
nx
.
shortest_path
(
env
.
graph
,
env
.
pose_to_vertex
(
env
.
state
()),
env
.
pose_to_vertex
(
target_view
)))
for
target_view
in
env
.
targets
()
]))
states
.
append
(
env
.
state
())
feed_dict
=
{
inputs_feed
[
mtype
]:
[[
obs
[
mtype
]]]
for
mtype
in
inputs_feed
}
feed_dict
[
prev_state_feed
[
0
]]
=
prev_state
[
0
]
feed_dict
[
prev_state_feed
[
1
]]
=
prev_state
[
1
]
action_values
,
prev_state
=
sess
.
run
(
policy_outputs
,
feed_dict
=
feed_dict
)
chosen_action
=
np
.
argmax
(
action_values
[
0
])
obs
,
_
,
done
,
info
=
env
.
step
(
np
.
int32
(
chosen_action
))
prev_action
[
0
][
0
][
chosen_action
]
=
1.
prev_action
[
0
][
0
][
-
1
]
=
float
(
info
[
'success'
])
# If the agent chooses action stop or the number of steps exceeeded
# env._episode_length.
if
done
:
break
# logging.info('distance = %d, id = %s, #steps = %d', distances_to_goal[-1],
output_path
=
os
.
path
.
join
(
output_folder
,
unique_id
+
'.npy'
)
with
tf
.
gfile
.
Open
(
output_path
,
'w'
)
as
f
:
print
'saving path information to {}'
.
format
(
output_path
)
np
.
save
(
f
,
{
'states'
:
states
,
'distance'
:
distances_to_goal
})
return
states
,
distances_to_goal
def
init
(
sequence_length
,
eval_init_points_file_name
,
worlds
):
"""Initializes the common operations between train and test."""
modality_types
=
create_modality_types
()
logging
.
info
(
'modality types: %r'
,
modality_types
)
# negative reward_goal_range prevents the env from terminating early when the
# agent is close to the goal. The policy should keep the agent until the end
# of the 100 steps either through chosing stop action or oscilating around
# the target.
env
=
active_vision_dataset_env
.
ActiveVisionDatasetEnv
(
modality_types
=
modality_types
+
[
task_env
.
ModalityTypes
.
GOAL
,
task_env
.
ModalityTypes
.
PREV_ACTION
],
reward_goal_range
=-
1
,
eval_init_points_file_name
=
eval_init_points_file_name
,
worlds
=
worlds
,
output_size
=
FLAGS
.
obs_resolution
,
)
config
=
create_task_io_config
(
modality_types
=
modality_types
,
goal_category_count
=
FLAGS
.
goal_category_count
,
action_size
=
FLAGS
.
action_size
,
sequence_length
=
sequence_length
,
)
task
=
tasks
.
GotoStaticXNoExplorationTask
(
env
=
env
,
config
=
config
)
embedders_dict
=
{
mtype
:
map_to_embedder
(
mtype
)
for
mtype
in
config
.
inputs
}
policy_params
=
tf
.
contrib
.
training
.
HParams
(
lstm_state_size
=
FLAGS
.
lstm_cell_size
,
fc_channels
=
FLAGS
.
policy_fc_size
,
weight_decay
=
FLAGS
.
weight_decay
,
target_embedding_size
=
FLAGS
.
embedding_fc_size
,
)
policy
=
policies
.
LSTMPolicy
(
modality_names
=
config
.
inputs
.
keys
(),
embedders_dict
=
embedders_dict
,
action_size
=
FLAGS
.
action_size
,
params
=
policy_params
,
max_episode_length
=
sequence_length
)
return
env
,
config
,
task
,
policy
def
test
():
"""Contains all the operations for testing policies."""
env
,
config
,
_
,
policy
=
init
(
1
,
'all_init_configs'
,
TEST_WORLDS
)
inputs_feed
,
prev_state_feed
,
policy_outputs
,
summary_op
=
create_eval_ops
(
policy
,
config
,
env
.
possible_targets
)
sv
=
tf
.
train
.
Supervisor
(
logdir
=
FLAGS
.
logdir
)
prev_checkpoint
=
None
with
sv
.
managed_session
(
start_standard_services
=
False
,
config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
))
as
sess
:
while
not
sv
.
should_stop
():
while
True
:
new_checkpoint
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
logdir
)
print
'new_checkpoint '
,
new_checkpoint
if
not
new_checkpoint
:
time
.
sleep
(
1
)
continue
if
prev_checkpoint
is
None
:
prev_checkpoint
=
new_checkpoint
break
if
prev_checkpoint
!=
new_checkpoint
:
prev_checkpoint
=
new_checkpoint
break
else
:
# if prev_checkpoint == new_checkpoint, we have to wait more.
time
.
sleep
(
1
)
checkpoint_step
=
int
(
new_checkpoint
[
new_checkpoint
.
rfind
(
'-'
)
+
1
:])
sv
.
saver
.
restore
(
sess
,
new_checkpoint
)
print
'--------------------'
print
'evaluating checkpoint {}'
.
format
(
new_checkpoint
)
folder_path
=
os
.
path
.
join
(
FLAGS
.
logdir
,
'evals'
,
str
(
checkpoint_step
))
if
not
tf
.
gfile
.
Exists
(
folder_path
):
tf
.
gfile
.
MakeDirs
(
folder_path
)
eval_stats
=
{
c
:
[]
for
c
in
env
.
possible_targets
}
for
test_iter
in
range
(
FLAGS
.
test_iters
):
print
'evaluating {} of {}'
.
format
(
test_iter
,
FLAGS
.
test_iters
)
_
,
distance_to_goal
=
unroll_policy_for_eval
(
sess
,
env
,
inputs_feed
,
prev_state_feed
,
policy_outputs
,
FLAGS
.
max_eval_episode_length
,
folder_path
,
)
print
'goal = {}'
.
format
(
env
.
goal_string
)
eval_stats
[
env
.
goal_string
].
append
(
float
(
distance_to_goal
[
-
1
]
<=
7
))
eval_stats
=
{
k
:
np
.
mean
(
v
)
for
k
,
v
in
eval_stats
.
iteritems
()}
eval_stats
[
'mean'
]
=
np
.
mean
(
eval_stats
.
values
())
print
eval_stats
feed_dict
=
{
summary_op
[
1
][
c
]:
eval_stats
[
c
]
for
c
in
eval_stats
}
summary_str
=
sess
.
run
(
summary_op
[
0
],
feed_dict
=
feed_dict
)
writer
=
sv
.
summary_writer
writer
.
add_summary
(
summary_str
,
checkpoint_step
)
writer
.
flush
()
def
train
():
_
,
_
,
task
,
policy
=
init
(
FLAGS
.
sequence_length
,
None
,
TRAIN_WORLDS
)
print
(
FLAGS
.
save_summaries_secs
)
print
(
FLAGS
.
save_interval_secs
)
print
(
FLAGS
.
logdir
)
with
tf
.
device
(
tf
.
train
.
replica_device_setter
(
ps_tasks
=
FLAGS
.
ps_tasks
,
merge_devices
=
True
)):
train_op
,
init_fn
=
create_train_and_init_ops
(
policy
=
policy
,
task
=
task
)
print
(
FLAGS
.
logdir
)
slim
.
learning
.
train
(
train_op
=
train_op
,
init_fn
=
init_fn
,
logdir
=
FLAGS
.
logdir
,
is_chief
=
FLAGS
.
task_id
==
0
,
number_of_steps
=
FLAGS
.
train_iters
,
save_summaries_secs
=
FLAGS
.
save_summaries_secs
,
save_interval_secs
=
FLAGS
.
save_interval_secs
,
session_config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
),
)
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_config
,
FLAGS
.
gin_params
)
if
FLAGS
.
mode
==
'train'
:
train
()
else
:
test
()
if
__name__
==
'__main__'
:
app
.
run
(
main
)
research/cognitive_planning/train_supervised_active_vision.sh
deleted
100755 → 0
View file @
09bc9f54
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# blaze build -c opt train_supervised_active_vision
# bazel build -c opt --config=cuda --copt=-mavx train_supervised_active_vision && \
bazel-bin/research/cognitive_planning/train_supervised_active_vision
\
--mode
=
'train'
\
--logdir
=
/usr/local/google/home/kosecka/local_avd_train/
\
--modality_types
=
'det'
\
--batch_size
=
8
\
--train_iters
=
200000
\
--lstm_cell_size
=
2048
\
--policy_fc_size
=
2048
\
--sequence_length
=
20
\
--max_eval_episode_length
=
100
\
--test_iters
=
194
\
--gin_config
=
envs/configs/active_vision_config.gin
\
--gin_params
=
'ActiveVisionDatasetEnv.dataset_root="/cns/jn-d/home/kosecka/AVD_Minimal/"'
\
--logtostderr
research/cognitive_planning/visualization_utils.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A set of functions that are used for visualization.
These functions often receive an image, perform some visualization on the image.
The functions do not return a value, instead they modify the image itself.
"""
import
collections
import
functools
# Set headless-friendly backend.
import
matplotlib
;
matplotlib
.
use
(
'Agg'
)
# pylint: disable=multiple-statements
import
matplotlib.pyplot
as
plt
# pylint: disable=g-import-not-at-top
import
numpy
as
np
import
PIL.Image
as
Image
import
PIL.ImageColor
as
ImageColor
import
PIL.ImageDraw
as
ImageDraw
import
PIL.ImageFont
as
ImageFont
import
six
import
tensorflow
as
tf
import
standard_fields
as
fields
_TITLE_LEFT_MARGIN
=
10
_TITLE_TOP_MARGIN
=
10
STANDARD_COLORS
=
[
'AliceBlue'
,
'Chartreuse'
,
'Aqua'
,
'Aquamarine'
,
'Azure'
,
'Beige'
,
'Bisque'
,
'BlanchedAlmond'
,
'BlueViolet'
,
'BurlyWood'
,
'CadetBlue'
,
'AntiqueWhite'
,
'Chocolate'
,
'Coral'
,
'CornflowerBlue'
,
'Cornsilk'
,
'Crimson'
,
'Cyan'
,
'DarkCyan'
,
'DarkGoldenRod'
,
'DarkGrey'
,
'DarkKhaki'
,
'DarkOrange'
,
'DarkOrchid'
,
'DarkSalmon'
,
'DarkSeaGreen'
,
'DarkTurquoise'
,
'DarkViolet'
,
'DeepPink'
,
'DeepSkyBlue'
,
'DodgerBlue'
,
'FireBrick'
,
'FloralWhite'
,
'ForestGreen'
,
'Fuchsia'
,
'Gainsboro'
,
'GhostWhite'
,
'Gold'
,
'GoldenRod'
,
'Salmon'
,
'Tan'
,
'HoneyDew'
,
'HotPink'
,
'IndianRed'
,
'Ivory'
,
'Khaki'
,
'Lavender'
,
'LavenderBlush'
,
'LawnGreen'
,
'LemonChiffon'
,
'LightBlue'
,
'LightCoral'
,
'LightCyan'
,
'LightGoldenRodYellow'
,
'LightGray'
,
'LightGrey'
,
'LightGreen'
,
'LightPink'
,
'LightSalmon'
,
'LightSeaGreen'
,
'LightSkyBlue'
,
'LightSlateGray'
,
'LightSlateGrey'
,
'LightSteelBlue'
,
'LightYellow'
,
'Lime'
,
'LimeGreen'
,
'Linen'
,
'Magenta'
,
'MediumAquaMarine'
,
'MediumOrchid'
,
'MediumPurple'
,
'MediumSeaGreen'
,
'MediumSlateBlue'
,
'MediumSpringGreen'
,
'MediumTurquoise'
,
'MediumVioletRed'
,
'MintCream'
,
'MistyRose'
,
'Moccasin'
,
'NavajoWhite'
,
'OldLace'
,
'Olive'
,
'OliveDrab'
,
'Orange'
,
'OrangeRed'
,
'Orchid'
,
'PaleGoldenRod'
,
'PaleGreen'
,
'PaleTurquoise'
,
'PaleVioletRed'
,
'PapayaWhip'
,
'PeachPuff'
,
'Peru'
,
'Pink'
,
'Plum'
,
'PowderBlue'
,
'Purple'
,
'Red'
,
'RosyBrown'
,
'RoyalBlue'
,
'SaddleBrown'
,
'Green'
,
'SandyBrown'
,
'SeaGreen'
,
'SeaShell'
,
'Sienna'
,
'Silver'
,
'SkyBlue'
,
'SlateBlue'
,
'SlateGray'
,
'SlateGrey'
,
'Snow'
,
'SpringGreen'
,
'SteelBlue'
,
'GreenYellow'
,
'Teal'
,
'Thistle'
,
'Tomato'
,
'Turquoise'
,
'Violet'
,
'Wheat'
,
'White'
,
'WhiteSmoke'
,
'Yellow'
,
'YellowGreen'
]
def
save_image_array_as_png
(
image
,
output_path
):
"""Saves an image (represented as a numpy array) to PNG.
Args:
image: a numpy array with shape [height, width, 3].
output_path: path to which image should be written.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
'RGB'
)
with
tf
.
gfile
.
Open
(
output_path
,
'w'
)
as
fid
:
image_pil
.
save
(
fid
,
'PNG'
)
def
encode_image_array_as_png_str
(
image
):
"""Encodes a numpy array into a PNG string.
Args:
image: a numpy array with shape [height, width, 3].
Returns:
PNG encoded image string.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
))
output
=
six
.
BytesIO
()
image_pil
.
save
(
output
,
format
=
'PNG'
)
png_string
=
output
.
getvalue
()
output
.
close
()
return
png_string
def
draw_bounding_box_on_image_array
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
'red'
,
thickness
=
4
,
display_str_list
=
(),
use_normalized_coordinates
=
True
):
"""Adds a bounding box to an image (numpy array).
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Args:
image: a numpy array with shape [height, width, 3].
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list: list of strings to display in box
(each to be shown on its own line).
use_normalized_coordinates: If True (default), treat coordinates
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
coordinates as absolute.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
'RGB'
)
draw_bounding_box_on_image
(
image_pil
,
ymin
,
xmin
,
ymax
,
xmax
,
color
,
thickness
,
display_str_list
,
use_normalized_coordinates
)
np
.
copyto
(
image
,
np
.
array
(
image_pil
))
def
draw_bounding_box_on_image
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
'red'
,
thickness
=
4
,
display_str_list
=
(),
use_normalized_coordinates
=
True
):
"""Adds a bounding box to an image.
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Each string in display_str_list is displayed on a separate line above the
bounding box in black text on a rectangle filled with the input 'color'.
If the top of the bounding box extends to the edge of the image, the strings
are displayed below the bounding box.
Args:
image: a PIL.Image object.
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list: list of strings to display in box
(each to be shown on its own line).
use_normalized_coordinates: If True (default), treat coordinates
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
coordinates as absolute.
"""
draw
=
ImageDraw
.
Draw
(
image
)
im_width
,
im_height
=
image
.
size
if
use_normalized_coordinates
:
(
left
,
right
,
top
,
bottom
)
=
(
xmin
*
im_width
,
xmax
*
im_width
,
ymin
*
im_height
,
ymax
*
im_height
)
else
:
(
left
,
right
,
top
,
bottom
)
=
(
xmin
,
xmax
,
ymin
,
ymax
)
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
thickness
,
fill
=
color
)
try
:
font
=
ImageFont
.
truetype
(
'arial.ttf'
,
24
)
except
IOError
:
font
=
ImageFont
.
load_default
()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights
=
[
font
.
getsize
(
ds
)[
1
]
for
ds
in
display_str_list
]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height
=
(
1
+
2
*
0.05
)
*
sum
(
display_str_heights
)
if
top
>
total_display_str_height
:
text_bottom
=
top
else
:
text_bottom
=
bottom
+
total_display_str_height
# Reverse list and print from bottom to top.
for
display_str
in
display_str_list
[::
-
1
]:
text_width
,
text_height
=
font
.
getsize
(
display_str
)
margin
=
np
.
ceil
(
0.05
*
text_height
)
draw
.
rectangle
(
[(
left
,
text_bottom
-
text_height
-
2
*
margin
),
(
left
+
text_width
,
text_bottom
)],
fill
=
color
)
draw
.
text
(
(
left
+
margin
,
text_bottom
-
text_height
-
margin
),
display_str
,
fill
=
'black'
,
font
=
font
)
text_bottom
-=
text_height
-
2
*
margin
def
draw_bounding_boxes_on_image_array
(
image
,
boxes
,
color
=
'red'
,
thickness
=
4
,
display_str_list_list
=
()):
"""Draws bounding boxes on image (numpy array).
Args:
image: a numpy array object.
boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
The coordinates are in normalized format between [0, 1].
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list_list: list of list of strings.
a list of strings for each bounding box.
The reason to pass a list of strings for a
bounding box is that it might contain
multiple labels.
Raises:
ValueError: if boxes is not a [N, 4] array
"""
image_pil
=
Image
.
fromarray
(
image
)
draw_bounding_boxes_on_image
(
image_pil
,
boxes
,
color
,
thickness
,
display_str_list_list
)
np
.
copyto
(
image
,
np
.
array
(
image_pil
))
def
draw_bounding_boxes_on_image
(
image
,
boxes
,
color
=
'red'
,
thickness
=
4
,
display_str_list_list
=
()):
"""Draws bounding boxes on image.
Args:
image: a PIL.Image object.
boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
The coordinates are in normalized format between [0, 1].
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list_list: list of list of strings.
a list of strings for each bounding box.
The reason to pass a list of strings for a
bounding box is that it might contain
multiple labels.
Raises:
ValueError: if boxes is not a [N, 4] array
"""
boxes_shape
=
boxes
.
shape
if
not
boxes_shape
:
return
if
len
(
boxes_shape
)
!=
2
or
boxes_shape
[
1
]
!=
4
:
raise
ValueError
(
'Input must be of size [N, 4]'
)
for
i
in
range
(
boxes_shape
[
0
]):
display_str_list
=
()
if
display_str_list_list
:
display_str_list
=
display_str_list_list
[
i
]
draw_bounding_box_on_image
(
image
,
boxes
[
i
,
0
],
boxes
[
i
,
1
],
boxes
[
i
,
2
],
boxes
[
i
,
3
],
color
,
thickness
,
display_str_list
)
def
_visualize_boxes
(
image
,
boxes
,
classes
,
scores
,
category_index
,
**
kwargs
):
return
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
=
category_index
,
**
kwargs
)
def
_visualize_boxes_and_masks
(
image
,
boxes
,
classes
,
scores
,
masks
,
category_index
,
**
kwargs
):
return
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
=
category_index
,
instance_masks
=
masks
,
**
kwargs
)
def
_visualize_boxes_and_keypoints
(
image
,
boxes
,
classes
,
scores
,
keypoints
,
category_index
,
**
kwargs
):
return
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
=
category_index
,
keypoints
=
keypoints
,
**
kwargs
)
def
_visualize_boxes_and_masks_and_keypoints
(
image
,
boxes
,
classes
,
scores
,
masks
,
keypoints
,
category_index
,
**
kwargs
):
return
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
=
category_index
,
instance_masks
=
masks
,
keypoints
=
keypoints
,
**
kwargs
)
def
draw_bounding_boxes_on_image_tensors
(
images
,
boxes
,
classes
,
scores
,
category_index
,
instance_masks
=
None
,
keypoints
=
None
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
0.2
,
use_normalized_coordinates
=
True
):
"""Draws bounding boxes, masks, and keypoints on batch of image tensors.
Args:
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
channels will be ignored.
boxes: [N, max_detections, 4] float32 tensor of detection boxes.
classes: [N, max_detections] int tensor of detection classes. Note that
classes are 1-indexed.
scores: [N, max_detections] float32 tensor of detection scores.
category_index: a dict that maps integer ids to category dicts. e.g.
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
instance masks.
keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
with keypoints.
max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
min_score_thresh: Minimum score threshold for visualization. Default 0.2.
use_normalized_coordinates: Whether to assume boxes and kepoints are in
normalized coordinates (as opposed to absolute coordiantes).
Default is True.
Returns:
4D image tensor of type uint8, with boxes drawn on top.
"""
# Additional channels are being ignored.
images
=
images
[:,
:,
:,
0
:
3
]
visualization_keyword_args
=
{
'use_normalized_coordinates'
:
use_normalized_coordinates
,
'max_boxes_to_draw'
:
max_boxes_to_draw
,
'min_score_thresh'
:
min_score_thresh
,
'agnostic_mode'
:
False
,
'line_thickness'
:
4
}
if
instance_masks
is
not
None
and
keypoints
is
None
:
visualize_boxes_fn
=
functools
.
partial
(
_visualize_boxes_and_masks
,
category_index
=
category_index
,
**
visualization_keyword_args
)
elems
=
[
images
,
boxes
,
classes
,
scores
,
instance_masks
]
elif
instance_masks
is
None
and
keypoints
is
not
None
:
visualize_boxes_fn
=
functools
.
partial
(
_visualize_boxes_and_keypoints
,
category_index
=
category_index
,
**
visualization_keyword_args
)
elems
=
[
images
,
boxes
,
classes
,
scores
,
keypoints
]
elif
instance_masks
is
not
None
and
keypoints
is
not
None
:
visualize_boxes_fn
=
functools
.
partial
(
_visualize_boxes_and_masks_and_keypoints
,
category_index
=
category_index
,
**
visualization_keyword_args
)
elems
=
[
images
,
boxes
,
classes
,
scores
,
instance_masks
,
keypoints
]
else
:
visualize_boxes_fn
=
functools
.
partial
(
_visualize_boxes
,
category_index
=
category_index
,
**
visualization_keyword_args
)
elems
=
[
images
,
boxes
,
classes
,
scores
]
def
draw_boxes
(
image_and_detections
):
"""Draws boxes on image."""
image_with_boxes
=
tf
.
py_func
(
visualize_boxes_fn
,
image_and_detections
,
tf
.
uint8
)
return
image_with_boxes
images
=
tf
.
map_fn
(
draw_boxes
,
elems
,
dtype
=
tf
.
uint8
,
back_prop
=
False
)
return
images
def
draw_side_by_side_evaluation_image
(
eval_dict
,
category_index
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
0.2
,
use_normalized_coordinates
=
True
):
"""Creates a side-by-side image with detections and groundtruth.
Bounding boxes (and instance masks, if available) are visualized on both
subimages.
Args:
eval_dict: The evaluation dictionary returned by
eval_util.result_dict_for_single_example().
category_index: A category index (dictionary) produced from a labelmap.
max_boxes_to_draw: The maximum number of boxes to draw for detections.
min_score_thresh: The minimum score threshold for showing detections.
use_normalized_coordinates: Whether to assume boxes and kepoints are in
normalized coordinates (as opposed to absolute coordiantes).
Default is True.
Returns:
A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
detections, while the subimage on the right corresponds to groundtruth.
"""
detection_fields
=
fields
.
DetectionResultFields
()
input_data_fields
=
fields
.
InputDataFields
()
instance_masks
=
None
if
detection_fields
.
detection_masks
in
eval_dict
:
instance_masks
=
tf
.
cast
(
tf
.
expand_dims
(
eval_dict
[
detection_fields
.
detection_masks
],
axis
=
0
),
tf
.
uint8
)
keypoints
=
None
if
detection_fields
.
detection_keypoints
in
eval_dict
:
keypoints
=
tf
.
expand_dims
(
eval_dict
[
detection_fields
.
detection_keypoints
],
axis
=
0
)
groundtruth_instance_masks
=
None
if
input_data_fields
.
groundtruth_instance_masks
in
eval_dict
:
groundtruth_instance_masks
=
tf
.
cast
(
tf
.
expand_dims
(
eval_dict
[
input_data_fields
.
groundtruth_instance_masks
],
axis
=
0
),
tf
.
uint8
)
images_with_detections
=
draw_bounding_boxes_on_image_tensors
(
eval_dict
[
input_data_fields
.
original_image
],
tf
.
expand_dims
(
eval_dict
[
detection_fields
.
detection_boxes
],
axis
=
0
),
tf
.
expand_dims
(
eval_dict
[
detection_fields
.
detection_classes
],
axis
=
0
),
tf
.
expand_dims
(
eval_dict
[
detection_fields
.
detection_scores
],
axis
=
0
),
category_index
,
instance_masks
=
instance_masks
,
keypoints
=
keypoints
,
max_boxes_to_draw
=
max_boxes_to_draw
,
min_score_thresh
=
min_score_thresh
,
use_normalized_coordinates
=
use_normalized_coordinates
)
images_with_groundtruth
=
draw_bounding_boxes_on_image_tensors
(
eval_dict
[
input_data_fields
.
original_image
],
tf
.
expand_dims
(
eval_dict
[
input_data_fields
.
groundtruth_boxes
],
axis
=
0
),
tf
.
expand_dims
(
eval_dict
[
input_data_fields
.
groundtruth_classes
],
axis
=
0
),
tf
.
expand_dims
(
tf
.
ones_like
(
eval_dict
[
input_data_fields
.
groundtruth_classes
],
dtype
=
tf
.
float32
),
axis
=
0
),
category_index
,
instance_masks
=
groundtruth_instance_masks
,
keypoints
=
None
,
max_boxes_to_draw
=
None
,
min_score_thresh
=
0.0
,
use_normalized_coordinates
=
use_normalized_coordinates
)
return
tf
.
concat
([
images_with_detections
,
images_with_groundtruth
],
axis
=
2
)
def
draw_keypoints_on_image_array
(
image
,
keypoints
,
color
=
'red'
,
radius
=
2
,
use_normalized_coordinates
=
True
):
"""Draws keypoints on an image (numpy array).
Args:
image: a numpy array with shape [height, width, 3].
keypoints: a numpy array with shape [num_keypoints, 2].
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
'RGB'
)
draw_keypoints_on_image
(
image_pil
,
keypoints
,
color
,
radius
,
use_normalized_coordinates
)
np
.
copyto
(
image
,
np
.
array
(
image_pil
))
def
draw_keypoints_on_image
(
image
,
keypoints
,
color
=
'red'
,
radius
=
2
,
use_normalized_coordinates
=
True
):
"""Draws keypoints on an image.
Args:
image: a PIL.Image object.
keypoints: a numpy array with shape [num_keypoints, 2].
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
draw
=
ImageDraw
.
Draw
(
image
)
im_width
,
im_height
=
image
.
size
keypoints_x
=
[
k
[
1
]
for
k
in
keypoints
]
keypoints_y
=
[
k
[
0
]
for
k
in
keypoints
]
if
use_normalized_coordinates
:
keypoints_x
=
tuple
([
im_width
*
x
for
x
in
keypoints_x
])
keypoints_y
=
tuple
([
im_height
*
y
for
y
in
keypoints_y
])
for
keypoint_x
,
keypoint_y
in
zip
(
keypoints_x
,
keypoints_y
):
draw
.
ellipse
([(
keypoint_x
-
radius
,
keypoint_y
-
radius
),
(
keypoint_x
+
radius
,
keypoint_y
+
radius
)],
outline
=
color
,
fill
=
color
)
def
draw_mask_on_image_array
(
image
,
mask
,
color
=
'red'
,
alpha
=
0.4
):
"""Draws mask on an image.
Args:
image: uint8 numpy array with shape (img_height, img_height, 3)
mask: a uint8 numpy array of shape (img_height, img_height) with
values between either 0 or 1.
color: color to draw the keypoints with. Default is red.
alpha: transparency value between 0 and 1. (default: 0.4)
Raises:
ValueError: On incorrect data type for image or masks.
"""
if
image
.
dtype
!=
np
.
uint8
:
raise
ValueError
(
'`image` not of type np.uint8'
)
if
mask
.
dtype
!=
np
.
uint8
:
raise
ValueError
(
'`mask` not of type np.uint8'
)
if
np
.
any
(
np
.
logical_and
(
mask
!=
1
,
mask
!=
0
)):
raise
ValueError
(
'`mask` elements should be in [0, 1]'
)
if
image
.
shape
[:
2
]
!=
mask
.
shape
:
raise
ValueError
(
'The image has spatial dimensions %s but the mask has '
'dimensions %s'
%
(
image
.
shape
[:
2
],
mask
.
shape
))
rgb
=
ImageColor
.
getrgb
(
color
)
pil_image
=
Image
.
fromarray
(
image
)
solid_color
=
np
.
expand_dims
(
np
.
ones_like
(
mask
),
axis
=
2
)
*
np
.
reshape
(
list
(
rgb
),
[
1
,
1
,
3
])
pil_solid_color
=
Image
.
fromarray
(
np
.
uint8
(
solid_color
)).
convert
(
'RGBA'
)
pil_mask
=
Image
.
fromarray
(
np
.
uint8
(
255.0
*
alpha
*
mask
)).
convert
(
'L'
)
pil_image
=
Image
.
composite
(
pil_solid_color
,
pil_image
,
pil_mask
)
np
.
copyto
(
image
,
np
.
array
(
pil_image
.
convert
(
'RGB'
)))
def
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
,
instance_masks
=
None
,
instance_boundaries
=
None
,
keypoints
=
None
,
use_normalized_coordinates
=
False
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
.
5
,
agnostic_mode
=
False
,
line_thickness
=
4
,
groundtruth_box_visualization_color
=
'black'
,
skip_scores
=
False
,
skip_labels
=
False
):
"""Overlay labeled boxes on an image with formatted scores and label names.
This function groups boxes that correspond to the same location
and creates a display string for each detection and overlays these
on the image. Note that this function modifies the image in place, and returns
that same image.
Args:
image: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a numpy array of shape [N, image_height, image_width] with
values ranging between 0 and 1, can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
be None
use_normalized_coordinates: whether boxes is to be interpreted as
normalized coordinates or not.
max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
all boxes.
min_score_thresh: minimum score threshold for a box to be visualized
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not. This mode will display scores but ignore
classes.
line_thickness: integer (default: 4) controlling line width of the boxes.
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
"""
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map
=
collections
.
defaultdict
(
list
)
box_to_color_map
=
collections
.
defaultdict
(
str
)
box_to_instance_masks_map
=
{}
box_to_instance_boundaries_map
=
{}
box_to_keypoints_map
=
collections
.
defaultdict
(
list
)
if
not
max_boxes_to_draw
:
max_boxes_to_draw
=
boxes
.
shape
[
0
]
for
i
in
range
(
min
(
max_boxes_to_draw
,
boxes
.
shape
[
0
])):
if
scores
is
None
or
scores
[
i
]
>
min_score_thresh
:
box
=
tuple
(
boxes
[
i
].
tolist
())
if
instance_masks
is
not
None
:
box_to_instance_masks_map
[
box
]
=
instance_masks
[
i
]
if
instance_boundaries
is
not
None
:
box_to_instance_boundaries_map
[
box
]
=
instance_boundaries
[
i
]
if
keypoints
is
not
None
:
box_to_keypoints_map
[
box
].
extend
(
keypoints
[
i
])
if
scores
is
None
:
box_to_color_map
[
box
]
=
groundtruth_box_visualization_color
else
:
display_str
=
''
if
not
skip_labels
:
if
not
agnostic_mode
:
if
classes
[
i
]
in
category_index
.
keys
():
class_name
=
category_index
[
classes
[
i
]][
'name'
]
else
:
class_name
=
'N/A'
display_str
=
str
(
class_name
)
if
not
skip_scores
:
if
not
display_str
:
display_str
=
'{}%'
.
format
(
int
(
100
*
scores
[
i
]))
else
:
display_str
=
'{}: {}%'
.
format
(
display_str
,
int
(
100
*
scores
[
i
]))
box_to_display_str_map
[
box
].
append
(
display_str
)
if
agnostic_mode
:
box_to_color_map
[
box
]
=
'DarkOrange'
else
:
box_to_color_map
[
box
]
=
STANDARD_COLORS
[
classes
[
i
]
%
len
(
STANDARD_COLORS
)]
# Draw all boxes onto image.
for
box
,
color
in
box_to_color_map
.
items
():
ymin
,
xmin
,
ymax
,
xmax
=
box
if
instance_masks
is
not
None
:
draw_mask_on_image_array
(
image
,
box_to_instance_masks_map
[
box
],
color
=
color
)
if
instance_boundaries
is
not
None
:
draw_mask_on_image_array
(
image
,
box_to_instance_boundaries_map
[
box
],
color
=
'red'
,
alpha
=
1.0
)
draw_bounding_box_on_image_array
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
color
,
thickness
=
line_thickness
,
display_str_list
=
box_to_display_str_map
[
box
],
use_normalized_coordinates
=
use_normalized_coordinates
)
if
keypoints
is
not
None
:
draw_keypoints_on_image_array
(
image
,
box_to_keypoints_map
[
box
],
color
=
color
,
radius
=
line_thickness
/
2
,
use_normalized_coordinates
=
use_normalized_coordinates
)
return
image
def
add_cdf_image_summary
(
values
,
name
):
"""Adds a tf.summary.image for a CDF plot of the values.
Normalizes `values` such that they sum to 1, plots the cumulative distribution
function and creates a tf image summary.
Args:
values: a 1-D float32 tensor containing the values.
name: name for the image summary.
"""
def
cdf_plot
(
values
):
"""Numpy function to plot CDF."""
normalized_values
=
values
/
np
.
sum
(
values
)
sorted_values
=
np
.
sort
(
normalized_values
)
cumulative_values
=
np
.
cumsum
(
sorted_values
)
fraction_of_examples
=
(
np
.
arange
(
cumulative_values
.
size
,
dtype
=
np
.
float32
)
/
cumulative_values
.
size
)
fig
=
plt
.
figure
(
frameon
=
False
)
ax
=
fig
.
add_subplot
(
'111'
)
ax
.
plot
(
fraction_of_examples
,
cumulative_values
)
ax
.
set_ylabel
(
'cumulative normalized values'
)
ax
.
set_xlabel
(
'fraction of examples'
)
fig
.
canvas
.
draw
()
width
,
height
=
fig
.
get_size_inches
()
*
fig
.
get_dpi
()
image
=
np
.
fromstring
(
fig
.
canvas
.
tostring_rgb
(),
dtype
=
'uint8'
).
reshape
(
1
,
int
(
height
),
int
(
width
),
3
)
return
image
cdf_plot
=
tf
.
py_func
(
cdf_plot
,
[
values
],
tf
.
uint8
)
tf
.
summary
.
image
(
name
,
cdf_plot
)
def
add_hist_image_summary
(
values
,
bins
,
name
):
"""Adds a tf.summary.image for a histogram plot of the values.
Plots the histogram of values and creates a tf image summary.
Args:
values: a 1-D float32 tensor containing the values.
bins: bin edges which will be directly passed to np.histogram.
name: name for the image summary.
"""
def
hist_plot
(
values
,
bins
):
"""Numpy function to plot hist."""
fig
=
plt
.
figure
(
frameon
=
False
)
ax
=
fig
.
add_subplot
(
'111'
)
y
,
x
=
np
.
histogram
(
values
,
bins
=
bins
)
ax
.
plot
(
x
[:
-
1
],
y
)
ax
.
set_ylabel
(
'count'
)
ax
.
set_xlabel
(
'value'
)
fig
.
canvas
.
draw
()
width
,
height
=
fig
.
get_size_inches
()
*
fig
.
get_dpi
()
image
=
np
.
fromstring
(
fig
.
canvas
.
tostring_rgb
(),
dtype
=
'uint8'
).
reshape
(
1
,
int
(
height
),
int
(
width
),
3
)
return
image
hist_plot
=
tf
.
py_func
(
hist_plot
,
[
values
,
bins
],
tf
.
uint8
)
tf
.
summary
.
image
(
name
,
hist_plot
)
research/cognitive_planning/viz_active_vision_dataset_main.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Initializes at random location and visualizes the optimal path.
Different modes of execution:
1) benchmark: It generates benchmark_iter sample trajectory to random goals
and plots the histogram of path lengths. It can be also used to see how fast
it runs.
2) vis: It visualizes the generated paths by image, semantic segmentation, and
so on.
3) human: allows the user to navigate through environment from keyboard input.
python viz_active_vision_dataset_main --
\
--mode=benchmark --benchmark_iter=1000 --gin_config=envs/configs/active_vision_config.gin
python viz_active_vision_dataset_main --
\
--mode=vis
\
--gin_config=envs/configs/active_vision_config.gin
python viz_active_vision_dataset_main --
\
--mode=human
\
--gin_config=envs/configs/active_vision_config.gin
python viz_active_vision_dataset_main.py --mode=eval --eval_folder=/usr/local/google/home/$USER/checkin_log_det/evals/ --output_folder=/usr/local/google/home/$USER/test_imgs/ --gin_config=envs/configs/active_vision_config.gin
"""
import
matplotlib
# pylint: disable=g-import-not-at-top
# Need Tk for interactive plots.
matplotlib
.
use
(
'TkAgg'
)
import
tensorflow
as
tf
from
matplotlib
import
pyplot
as
plt
import
numpy
as
np
import
os
from
pyglib
import
app
from
pyglib
import
flags
import
gin
import
cv2
from
envs
import
active_vision_dataset_env
from
envs
import
task_env
VIS_MODE
=
'vis'
HUMAN_MODE
=
'human'
BENCHMARK_MODE
=
'benchmark'
GRAPH_MODE
=
'graph'
EVAL_MODE
=
'eval'
flags
.
DEFINE_enum
(
'mode'
,
VIS_MODE
,
[
VIS_MODE
,
HUMAN_MODE
,
BENCHMARK_MODE
,
GRAPH_MODE
,
EVAL_MODE
],
'mode of the execution'
)
flags
.
DEFINE_integer
(
'benchmark_iter'
,
1000
,
'number of iterations for benchmarking'
)
flags
.
DEFINE_string
(
'eval_folder'
,
''
,
'the path to the eval folder'
)
flags
.
DEFINE_string
(
'output_folder'
,
''
,
'the path to which the images and gifs are written'
)
flags
.
DEFINE_multi_string
(
'gin_config'
,
[],
'List of paths to a gin config files for the env.'
)
flags
.
DEFINE_multi_string
(
'gin_params'
,
[],
'Newline separated list of Gin parameter bindings.'
)
mt
=
task_env
.
ModalityTypes
FLAGS
=
flags
.
FLAGS
def
benchmark
(
env
,
targets
):
"""Benchmarks the speed of sequence generation by env.
Args:
env: environment.
targets: list of target classes.
"""
episode_lengths
=
{}
all_init_configs
=
{}
all_actions
=
dict
([(
a
,
0.
)
for
a
in
env
.
actions
])
for
i
in
range
(
FLAGS
.
benchmark_iter
):
path
,
actions
,
_
,
_
=
env
.
random_step_sequence
()
selected_actions
=
np
.
argmax
(
actions
,
axis
=-
1
)
new_actions
=
dict
([(
a
,
0.
)
for
a
in
env
.
actions
])
for
a
in
selected_actions
:
new_actions
[
env
.
actions
[
a
]]
+=
1.
/
selected_actions
.
shape
[
0
]
for
a
in
new_actions
:
all_actions
[
a
]
+=
new_actions
[
a
]
/
FLAGS
.
benchmark_iter
start_image_id
,
world
,
goal
=
env
.
get_init_config
(
path
)
print
world
if
world
not
in
all_init_configs
:
all_init_configs
[
world
]
=
set
()
all_init_configs
[
world
].
add
((
start_image_id
,
goal
,
len
(
actions
)))
if
env
.
goal_index
not
in
episode_lengths
:
episode_lengths
[
env
.
goal_index
]
=
[]
episode_lengths
[
env
.
goal_index
].
append
(
len
(
actions
))
for
i
,
cls
in
enumerate
(
episode_lengths
):
plt
.
subplot
(
231
+
i
)
plt
.
hist
(
episode_lengths
[
cls
])
plt
.
title
(
targets
[
cls
])
plt
.
show
()
def
human
(
env
,
targets
):
"""Lets user play around the env manually."""
string_key_map
=
{
'a'
:
'left'
,
'd'
:
'right'
,
'w'
:
'forward'
,
's'
:
'backward'
,
'j'
:
'rotate_ccw'
,
'l'
:
'rotate_cw'
,
'n'
:
'stop'
}
integer_key_map
=
{
'a'
:
env
.
actions
.
index
(
'left'
),
'd'
:
env
.
actions
.
index
(
'right'
),
'w'
:
env
.
actions
.
index
(
'forward'
),
's'
:
env
.
actions
.
index
(
'backward'
),
'j'
:
env
.
actions
.
index
(
'rotate_ccw'
),
'l'
:
env
.
actions
.
index
(
'rotate_cw'
),
'n'
:
env
.
actions
.
index
(
'stop'
)
}
for
k
in
integer_key_map
:
integer_key_map
[
k
]
=
np
.
int32
(
integer_key_map
[
k
])
plt
.
ion
()
for
_
in
range
(
20
):
obs
=
env
.
reset
()
steps
=
-
1
action
=
None
while
True
:
print
'distance = '
,
obs
[
task_env
.
ModalityTypes
.
DISTANCE
]
steps
+=
1
depth_value
=
obs
[
task_env
.
ModalityTypes
.
DEPTH
][:,
:,
0
]
depth_mask
=
obs
[
task_env
.
ModalityTypes
.
DEPTH
][:,
:,
1
]
seg_mask
=
np
.
squeeze
(
obs
[
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
])
det_mask
=
np
.
argmax
(
obs
[
task_env
.
ModalityTypes
.
OBJECT_DETECTION
],
axis
=-
1
)
img
=
obs
[
task_env
.
ModalityTypes
.
IMAGE
]
plt
.
subplot
(
231
)
plt
.
title
(
'steps = {}'
.
format
(
steps
))
plt
.
imshow
(
img
.
astype
(
np
.
uint8
))
plt
.
subplot
(
232
)
plt
.
imshow
(
depth_value
)
plt
.
title
(
'depth value'
)
plt
.
subplot
(
233
)
plt
.
imshow
(
depth_mask
)
plt
.
title
(
'depth mask'
)
plt
.
subplot
(
234
)
plt
.
imshow
(
seg_mask
)
plt
.
title
(
'seg'
)
plt
.
subplot
(
235
)
plt
.
imshow
(
det_mask
)
plt
.
title
(
'det'
)
plt
.
subplot
(
236
)
plt
.
title
(
'goal={}'
.
format
(
targets
[
env
.
goal_index
]))
plt
.
draw
()
while
True
:
s
=
raw_input
(
'key = '
)
if
np
.
random
.
rand
()
>
0.5
:
key_map
=
string_key_map
else
:
key_map
=
integer_key_map
if
s
in
key_map
:
action
=
key_map
[
s
]
break
else
:
print
'invalid action'
print
'action = {}'
.
format
(
action
)
if
action
==
'stop'
:
print
'dist to goal: {}'
.
format
(
len
(
env
.
path_to_goal
())
-
2
)
break
obs
,
reward
,
done
,
info
=
env
.
step
(
action
)
print
'reward = {}, done = {}, success = {}'
.
format
(
reward
,
done
,
info
[
'success'
])
def
visualize_random_step_sequence
(
env
):
"""Visualizes random sequence of steps."""
plt
.
ion
()
for
_
in
range
(
20
):
path
,
actions
,
_
,
step_outputs
=
env
.
random_step_sequence
(
max_len
=
30
)
print
'path = {}'
.
format
(
path
)
for
action
,
step_output
in
zip
(
actions
,
step_outputs
):
obs
,
_
,
done
,
_
=
step_output
depth_value
=
obs
[
task_env
.
ModalityTypes
.
DEPTH
][:,
:,
0
]
depth_mask
=
obs
[
task_env
.
ModalityTypes
.
DEPTH
][:,
:,
1
]
seg_mask
=
np
.
squeeze
(
obs
[
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
])
det_mask
=
np
.
argmax
(
obs
[
task_env
.
ModalityTypes
.
OBJECT_DETECTION
],
axis
=-
1
)
img
=
obs
[
task_env
.
ModalityTypes
.
IMAGE
]
plt
.
subplot
(
231
)
plt
.
imshow
(
img
.
astype
(
np
.
uint8
))
plt
.
subplot
(
232
)
plt
.
imshow
(
depth_value
)
plt
.
title
(
'depth value'
)
plt
.
subplot
(
233
)
plt
.
imshow
(
depth_mask
)
plt
.
title
(
'depth mask'
)
plt
.
subplot
(
234
)
plt
.
imshow
(
seg_mask
)
plt
.
title
(
'seg'
)
plt
.
subplot
(
235
)
plt
.
imshow
(
det_mask
)
plt
.
title
(
'det'
)
plt
.
subplot
(
236
)
print
'action = {}'
.
format
(
action
)
print
'done = {}'
.
format
(
done
)
plt
.
draw
()
if
raw_input
(
'press
\'
n
\'
to go to the next random sequence. Otherwise, '
'press any key to continue...'
)
==
'n'
:
break
def
visualize
(
env
,
input_folder
,
output_root_folder
):
"""visualizes images for sequence of steps from the evals folder."""
def
which_env
(
file_name
):
img_name
=
file_name
.
split
(
'_'
)[
0
][
2
:
5
]
env_dict
=
{
'161'
:
'Home_016_1'
,
'131'
:
'Home_013_1'
,
'111'
:
'Home_011_1'
}
if
img_name
in
env_dict
:
return
env_dict
[
img_name
]
else
:
raise
ValueError
(
'could not resolve env: {} {}'
.
format
(
img_name
,
file_name
))
def
which_goal
(
file_name
):
return
file_name
[
file_name
.
find
(
'_'
)
+
1
:]
output_images_folder
=
os
.
path
.
join
(
output_root_folder
,
'images'
)
output_gifs_folder
=
os
.
path
.
join
(
output_root_folder
,
'gifs'
)
if
not
tf
.
gfile
.
IsDirectory
(
output_images_folder
):
tf
.
gfile
.
MakeDirs
(
output_images_folder
)
if
not
tf
.
gfile
.
IsDirectory
(
output_gifs_folder
):
tf
.
gfile
.
MakeDirs
(
output_gifs_folder
)
npy_files
=
[
os
.
path
.
join
(
input_folder
,
name
)
for
name
in
tf
.
gfile
.
ListDirectory
(
input_folder
)
if
name
.
find
(
'npy'
)
>=
0
]
for
i
,
npy_file
in
enumerate
(
npy_files
):
print
'saving images {}/{}'
.
format
(
i
,
len
(
npy_files
))
pure_name
=
npy_file
[
npy_file
.
rfind
(
'/'
)
+
1
:
-
4
]
output_folder
=
os
.
path
.
join
(
output_images_folder
,
pure_name
)
if
not
tf
.
gfile
.
IsDirectory
(
output_folder
):
tf
.
gfile
.
MakeDirs
(
output_folder
)
print
'*******'
print
pure_name
[
0
:
pure_name
.
find
(
'_'
)]
env
.
reset_for_eval
(
which_env
(
pure_name
),
which_goal
(
pure_name
),
pure_name
[
0
:
pure_name
.
find
(
'_'
)],
)
with
tf
.
gfile
.
Open
(
npy_file
)
as
h
:
states
=
np
.
load
(
h
).
item
()[
'states'
]
images
=
[
env
.
observation
(
state
)[
mt
.
IMAGE
]
for
state
in
states
]
for
j
,
img
in
enumerate
(
images
):
cv2
.
imwrite
(
os
.
path
.
join
(
output_folder
,
'{0:03d}'
.
format
(
j
)
+
'.jpg'
),
img
[:,
:,
::
-
1
])
print
'converting to gif'
os
.
system
(
'convert -set delay 20 -colors 256 -dispose 1 {}/*.jpg {}.gif'
.
format
(
output_folder
,
os
.
path
.
join
(
output_gifs_folder
,
pure_name
+
'.gif'
)
)
)
def
evaluate_folder
(
env
,
folder_path
):
"""Evaluates the performance from the evals folder."""
targets
=
[
'fridge'
,
'dining_table'
,
'microwave'
,
'tv'
,
'couch'
]
def
compute_acc
(
npy_file
):
with
tf
.
gfile
.
Open
(
npy_file
)
as
h
:
data
=
np
.
load
(
h
).
item
()
if
npy_file
.
find
(
'dining_table'
)
>=
0
:
category
=
'dining_table'
else
:
category
=
npy_file
[
npy_file
.
rfind
(
'_'
)
+
1
:
-
4
]
return
category
,
data
[
'distance'
][
-
1
]
-
2
def
evaluate_iteration
(
folder
):
"""Evaluates the data from the folder of certain eval iteration."""
print
folder
npy_files
=
[
os
.
path
.
join
(
folder
,
name
)
for
name
in
tf
.
gfile
.
ListDirectory
(
folder
)
if
name
.
find
(
'npy'
)
>=
0
]
eval_stats
=
{
c
:
[]
for
c
in
targets
}
for
npy_file
in
npy_files
:
try
:
category
,
dist
=
compute_acc
(
npy_file
)
except
:
# pylint: disable=bare-except
continue
eval_stats
[
category
].
append
(
float
(
dist
<=
5
))
for
c
in
eval_stats
:
if
not
eval_stats
[
c
]:
print
'incomplete eval {}: empty class {}'
.
format
(
folder_path
,
c
)
return
None
eval_stats
[
c
]
=
np
.
mean
(
eval_stats
[
c
])
eval_stats
[
'mean'
]
=
np
.
mean
(
eval_stats
.
values
())
return
eval_stats
checkpoint_folders
=
[
folder_path
+
x
for
x
in
tf
.
gfile
.
ListDirectory
(
folder_path
)
if
tf
.
gfile
.
IsDirectory
(
folder_path
+
x
)
]
print
'{} folders found'
.
format
(
len
(
checkpoint_folders
))
print
'------------------------'
all_iters
=
[]
all_accs
=
[]
for
i
,
folder
in
enumerate
(
checkpoint_folders
):
print
'processing {}/{}'
.
format
(
i
,
len
(
checkpoint_folders
))
eval_stats
=
evaluate_iteration
(
folder
)
if
eval_stats
is
None
:
continue
else
:
iter_no
=
int
(
folder
[
folder
.
rfind
(
'/'
)
+
1
:])
print
'result '
,
iter_no
,
eval_stats
[
'mean'
]
all_accs
.
append
(
eval_stats
[
'mean'
])
all_iters
.
append
(
iter_no
)
all_accs
=
np
.
asarray
(
all_accs
)
all_iters
=
np
.
asarray
(
all_iters
)
idx
=
np
.
argmax
(
all_accs
)
print
'best result at iteration {} was {}'
.
format
(
all_iters
[
idx
],
all_accs
[
idx
])
order
=
np
.
argsort
(
all_iters
)
all_iters
=
all_iters
[
order
]
all_accs
=
all_accs
[
order
]
#plt.plot(all_iters, all_accs)
#plt.show()
#print 'done plotting'
best_iteration_folder
=
os
.
path
.
join
(
folder_path
,
str
(
all_iters
[
idx
]))
print
'generating gifs and images for {}'
.
format
(
best_iteration_folder
)
visualize
(
env
,
best_iteration_folder
,
FLAGS
.
output_folder
)
def
main
(
_
):
gin
.
parse_config_files_and_bindings
(
FLAGS
.
gin_config
,
FLAGS
.
gin_params
)
print
(
'********'
)
print
(
FLAGS
.
mode
)
print
(
FLAGS
.
gin_config
)
print
(
FLAGS
.
gin_params
)
env
=
active_vision_dataset_env
.
ActiveVisionDatasetEnv
(
modality_types
=
[
task_env
.
ModalityTypes
.
IMAGE
,
task_env
.
ModalityTypes
.
SEMANTIC_SEGMENTATION
,
task_env
.
ModalityTypes
.
OBJECT_DETECTION
,
task_env
.
ModalityTypes
.
DEPTH
,
task_env
.
ModalityTypes
.
DISTANCE
])
if
FLAGS
.
mode
==
BENCHMARK_MODE
:
benchmark
(
env
,
env
.
possible_targets
)
elif
FLAGS
.
mode
==
GRAPH_MODE
:
for
loc
in
env
.
worlds
:
env
.
check_scene_graph
(
loc
,
'fridge'
)
elif
FLAGS
.
mode
==
HUMAN_MODE
:
human
(
env
,
env
.
possible_targets
)
elif
FLAGS
.
mode
==
VIS_MODE
:
visualize_random_step_sequence
(
env
)
elif
FLAGS
.
mode
==
EVAL_MODE
:
evaluate_folder
(
env
,
FLAGS
.
eval_folder
)
if
__name__
==
'__main__'
:
app
.
run
(
main
)
research/compression/README.md
deleted
100644 → 0
View file @
09bc9f54



# Compression with Neural Networks
This is a
[
TensorFlow
](
http://www.tensorflow.org/
)
model repo containing
research on compression with neural networks. This repo currently contains
code for the following papers:
[
Full Resolution Image Compression with Recurrent Neural Networks
](
https://arxiv.org/abs/1608.05148
)
## Organization
[
Image Encoder
](
image_encoder/
)
: Encoding and decoding images into their binary representation.
[
Entropy Coder
](
entropy_coder/
)
: Lossless compression of the binary representation.
## Contact Info
Model repository maintained by Nick Johnston (
[
nmjohn
](
https://github.com/nmjohn
)
).
research/compression/entropy_coder/README.md
deleted
100644 → 0
View file @
09bc9f54
# Neural net based entropy coding
This is a
[
TensorFlow
](
http://www.tensorflow.org/
)
model for additional
lossless compression of bitstreams generated by neural net based image
encoders as described in
[
https://arxiv.org/abs/1703.10114
](
https://arxiv.org/abs/1703.10114
)
.
To be more specific, the entropy coder aims at compressing further binary
codes which have a 3D tensor structure with:
*
the first two dimensions of the tensors corresponding to the height and
the width of the binary codes,
*
the last dimension being the depth of the codes. The last dimension can be
sliced into N groups of K, where each additional group is used by the image
decoder to add more details to the reconstructed image.
The code in this directory only contains the underlying code probability model
but does not perform the actual compression using arithmetic coding.
The code probability model is enough to compute the theoretical compression
ratio.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed.
You will also need to add the top level source directory of the entropy coder
to your
`PYTHONPATH`
, for example:
`export PYTHONPATH=${PYTHONPATH}:/tmp/models/compression`
## Training the entropy coder
### Synthetic dataset
If you do not have a training dataset, there is a simple code generative model
that you can use to generate a dataset and play with the entropy coder.
The generative model is located under dataset/gen
\_
synthetic
\_
dataset.py. Note
that this simple generative model is not going to give good results on real
images as it is not supposed to be close to the statistics of the binary
representation of encoded images. Consider it as a toy dataset, no more, no
less.
To generate a synthetic dataset with 20000 samples:
`mkdir -p /tmp/dataset`
`python ./dataset/gen_synthetic_dataset.py --dataset_dir=/tmp/dataset/
--count=20000`
Note that the generator has not been optimized at all, generating the synthetic
dataset is currently pretty slow.
### Training
If you just want to play with the entropy coder trainer, here is the command
line that can be used to train the entropy coder on the synthetic dataset:
`mkdir -p /tmp/entropy_coder_train`
`python ./core/entropy_coder_train.py --task=0
--train_dir=/tmp/entropy_coder_train/
--model=progressive
--model_config=./configs/synthetic/model_config.json
--train_config=./configs/synthetic/train_config.json
--input_config=./configs/synthetic/input_config.json
`
Training is configured using 3 files formatted using JSON:
*
One file is used to configure the underlying entropy coder model.
Currently, only the
*progressive*
model is supported.
This model takes 2 mandatory parameters and an optional one:
*
`layer_depth`
: the number of bits per layer (a.k.a. iteration).
Background: the image decoder takes each layer to add more detail
to the image.
*
`layer_count`
: the maximum number of layers that should be supported
by the model. This should be equal or greater than the maximum number
of layers in the input binary codes.
*
`coded_layer_count`
: This can be used to consider only partial codes,
keeping only the first
`coded_layer_count`
layers and ignoring the
remaining layers. If left empty, the binary codes are left unchanged.
*
One file to configure the training, including the learning rate, ...
The meaning of the parameters are pretty straightforward. Note that this
file is only used during training and is not needed during inference.
*
One file to specify the input dataset to use during training.
The dataset is formatted using tf.RecordIO.
## Inference: file size after entropy coding.
### Using a synthetic sample
Here is the command line to generate a single synthetic sample formatted
in the same way as what is provided by the image encoder:
`python ./dataset/gen_synthetic_single.py
--sample_filename=/tmp/dataset/sample_0000.npz`
To actually compute the additional compression ratio using the entropy coder
trained in the previous step:
`python ./core/entropy_coder_single.py
--model=progressive
--model_config=./configs/synthetic/model_config.json
--input_codes=/tmp/dataset/sample_0000.npz
--checkpoint=/tmp/entropy_coder_train/model.ckpt-209078`
where the checkpoint number should be adjusted accordingly.
research/compression/entropy_coder/__init__.py
deleted
100644 → 0
View file @
09bc9f54
research/compression/entropy_coder/all_models/__init__.py
deleted
100644 → 0
View file @
09bc9f54
research/compression/entropy_coder/all_models/all_models.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import and register all the entropy coder models."""
# pylint: disable=unused-import
from
entropy_coder.progressive
import
progressive
research/compression/entropy_coder/all_models/all_models_test.py
deleted
100644 → 0
View file @
09bc9f54
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Basic test of all registered models."""
import
tensorflow
as
tf
# pylint: disable=unused-import
import
all_models
# pylint: enable=unused-import
from
entropy_coder.model
import
model_factory
class
AllModelsTest
(
tf
.
test
.
TestCase
):
def
testBuildModelForTraining
(
self
):
factory
=
model_factory
.
GetModelRegistry
()
model_names
=
factory
.
GetAvailableModels
()
for
m
in
model_names
:
tf
.
reset_default_graph
()
global_step
=
tf
.
Variable
(
tf
.
zeros
([],
dtype
=
tf
.
int64
),
trainable
=
False
,
name
=
'global_step'
)
optimizer
=
tf
.
train
.
GradientDescentOptimizer
(
learning_rate
=
0.1
)
batch_size
=
3
height
=
40
width
=
20
depth
=
5
binary_codes
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
batch_size
,
height
,
width
,
depth
])
# Create a model with the default configuration.
print
(
'Creating model: {}'
.
format
(
m
))
model
=
factory
.
CreateModel
(
m
)
model
.
Initialize
(
global_step
,
optimizer
,
model
.
GetConfigStringForUnitTest
())
self
.
assertTrue
(
model
.
loss
is
None
,
'model: {}'
.
format
(
m
))
self
.
assertTrue
(
model
.
train_op
is
None
,
'model: {}'
.
format
(
m
))
self
.
assertTrue
(
model
.
average_code_length
is
None
,
'model: {}'
.
format
(
m
))
# Build the Tensorflow graph corresponding to the model.
model
.
BuildGraph
(
binary_codes
)
self
.
assertTrue
(
model
.
loss
is
not
None
,
'model: {}'
.
format
(
m
))
self
.
assertTrue
(
model
.
average_code_length
is
not
None
,
'model: {}'
.
format
(
m
))
if
model
.
train_op
is
None
:
print
(
'Model {} is not trainable'
.
format
(
m
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/compression/entropy_coder/configs/gru_prime3/model_config.json
deleted
100644 → 0
View file @
09bc9f54
{
"layer_count"
:
16
,
"layer_depth"
:
32
}
research/compression/entropy_coder/configs/synthetic/input_config.json
deleted
100644 → 0
View file @
09bc9f54
{
"data"
:
"/tmp/dataset/synthetic_dataset"
,
"unique_code_size"
:
true
}
research/compression/entropy_coder/configs/synthetic/model_config.json
deleted
100644 → 0
View file @
09bc9f54
{
"layer_depth"
:
2
,
"layer_count"
:
8
}
research/compression/entropy_coder/configs/synthetic/train_config.json
deleted
100644 → 0
View file @
09bc9f54
{
"batch_size"
:
4
,
"learning_rate"
:
0.1
,
"decay_rate"
:
0.9
,
"samples_per_decay"
:
20000
}
Prev
1
…
9
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment