Commit 42da7864 authored by Christopher Shallue's avatar Christopher Shallue
Browse files

Move tensorflow_models/research/astronet to google-research/exoplanet-ml

parent 17c2f0cc
This diff is collapsed.
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Operations for feeding input data using TensorFlow placeholders."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def prepare_feed_dict(model, features, labels=None, is_training=None):
"""Prepares a feed_dict for sess.run() given a batch of features and labels.
Args:
model: An instance of AstroModel.
features: Dictionary containing "time_series_features" and "aux_features".
Each is a dictionary of named numpy arrays of shape [batch_size, length].
labels: (Optional). Numpy array of shape [batch_size].
is_training: (Optional). Python boolean to feed to the model.is_training
Tensor (if None, no value is fed).
Returns:
feed_dict: A dictionary of input Tensor to numpy array.
"""
feed_dict = {}
for feature, tensor in model.time_series_features.items():
feed_dict[tensor] = features["time_series_features"][feature]
for feature, tensor in model.aux_features.items():
feed_dict[tensor] = features["aux_features"][feature]
if labels is not None:
feed_dict[model.labels] = labels
if is_training is not None:
feed_dict[model.is_training] = is_training
return feed_dict
def build_feature_placeholders(config):
"""Builds tf.Placeholder ops for feeding model features and labels.
Args:
config: ConfigDict containing the feature configurations.
Returns:
features: A dictionary containing "time_series_features" and "aux_features",
each of which is a dictionary of tf.Placeholders of features from the
input configuration. All features have dtype float32 and shape
[batch_size, length].
"""
batch_size = None # Batch size will be dynamically specified.
features = {"time_series_features": {}, "aux_features": {}}
for feature_name, feature_spec in config.items():
placeholder = tf.placeholder(
dtype=tf.float32,
shape=[batch_size, feature_spec.length],
name=feature_name)
if feature_spec.is_time_series:
features["time_series_features"][feature_name] = placeholder
else:
features["aux_features"][feature_name] = placeholder
return features
def build_labels_placeholder():
"""Builds a tf.Placeholder op for feeding model labels.
Returns:
labels: An int64 tf.Placeholder with shape [batch_size].
"""
batch_size = None # Batch size will be dynamically specified.
return tf.placeholder(dtype=tf.int64, shape=[batch_size], name="labels")
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for input_ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from astronet.ops import input_ops
from tf_util import configdict
class InputOpsTest(tf.test.TestCase):
def assertFeatureShapesEqual(self, expected_shapes, features):
"""Asserts that a dict of feature placeholders has the expected shapes.
Args:
expected_shapes: Dictionary of expected Tensor shapes, as lists,
corresponding to the structure of 'features'.
features: Dictionary of feature placeholders of the format returned by
input_ops.build_feature_placeholders().
"""
actual_shapes = {}
for feature_type in features:
actual_shapes[feature_type] = {
feature: tensor.shape.as_list()
for feature, tensor in features[feature_type].items()
}
self.assertDictEqual(expected_shapes, actual_shapes)
def testBuildFeaturePlaceholders(self):
# One time series feature.
config = configdict.ConfigDict({
"time_feature_1": {
"length": 14,
"is_time_series": True,
}
})
expected_shapes = {
"time_series_features": {
"time_feature_1": [None, 14],
},
"aux_features": {}
}
features = input_ops.build_feature_placeholders(config)
self.assertFeatureShapesEqual(expected_shapes, features)
# Two time series features.
config = configdict.ConfigDict({
"time_feature_1": {
"length": 14,
"is_time_series": True,
},
"time_feature_2": {
"length": 5,
"is_time_series": True,
}
})
expected_shapes = {
"time_series_features": {
"time_feature_1": [None, 14],
"time_feature_2": [None, 5],
},
"aux_features": {}
}
features = input_ops.build_feature_placeholders(config)
self.assertFeatureShapesEqual(expected_shapes, features)
# One aux feature.
config = configdict.ConfigDict({
"time_feature_1": {
"length": 14,
"is_time_series": True,
},
"aux_feature_1": {
"length": 1,
"is_time_series": False,
}
})
expected_shapes = {
"time_series_features": {
"time_feature_1": [None, 14],
},
"aux_features": {
"aux_feature_1": [None, 1]
}
}
features = input_ops.build_feature_placeholders(config)
self.assertFeatureShapesEqual(expected_shapes, features)
# Two aux features.
config = configdict.ConfigDict({
"time_feature_1": {
"length": 14,
"is_time_series": True,
},
"aux_feature_1": {
"length": 1,
"is_time_series": False,
},
"aux_feature_2": {
"length": 6,
"is_time_series": False,
},
})
expected_shapes = {
"time_series_features": {
"time_feature_1": [None, 14],
},
"aux_features": {
"aux_feature_1": [None, 1],
"aux_feature_2": [None, 6]
}
}
features = input_ops.build_feature_placeholders(config)
self.assertFeatureShapesEqual(expected_shapes, features)
if __name__ == "__main__":
tf.test.main()
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions for computing evaluation metrics."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def _metric_variable(name, shape, dtype):
"""Creates a Variable in LOCAL_VARIABLES and METRIC_VARIABLES collections."""
return tf.get_variable(
name,
initializer=tf.zeros(shape, dtype),
trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES, tf.GraphKeys.METRIC_VARIABLES])
def _build_metrics(labels, predictions, weights, batch_losses, output_dim=1):
"""Builds TensorFlow operations to compute model evaluation metrics.
Args:
labels: Tensor with shape [batch_size].
predictions: Tensor with shape [batch_size, output_dim].
weights: Tensor with shape [batch_size].
batch_losses: Tensor with shape [batch_size].
output_dim: Dimension of model output
Returns:
A dictionary {metric_name: (metric_value, update_op).
"""
# Compute the predicted labels.
assert len(predictions.shape) == 2
binary_classification = output_dim == 1
if binary_classification:
assert predictions.shape[1] == 1
predictions = tf.squeeze(predictions, axis=[1])
predicted_labels = tf.to_int32(
tf.greater(predictions, 0.5), name="predicted_labels")
else:
predicted_labels = tf.argmax(
predictions, 1, name="predicted_labels", output_type=tf.int32)
metrics = {}
with tf.variable_scope("metrics"):
# Total number of examples.
num_examples = _metric_variable("num_examples", [], tf.float32)
update_num_examples = tf.assign_add(num_examples, tf.reduce_sum(weights))
metrics["num_examples"] = (num_examples.read_value(), update_num_examples)
# Accuracy metrics.
num_correct = _metric_variable("num_correct", [], tf.float32)
is_correct = weights * tf.to_float(tf.equal(labels, predicted_labels))
update_num_correct = tf.assign_add(num_correct, tf.reduce_sum(is_correct))
metrics["accuracy/num_correct"] = (num_correct.read_value(),
update_num_correct)
accuracy = tf.div(num_correct, num_examples, name="accuracy")
metrics["accuracy/accuracy"] = (accuracy, tf.no_op())
# Weighted cross-entropy loss.
metrics["losses/weighted_cross_entropy"] = tf.metrics.mean(
batch_losses, weights=weights, name="cross_entropy_loss")
def _count_condition(name, labels_value, predicted_value):
"""Creates a counter for given values of predictions and labels."""
count = _metric_variable(name, [], tf.float32)
is_equal = tf.to_float(
tf.logical_and(
tf.equal(labels, labels_value),
tf.equal(predicted_labels, predicted_value)))
update_op = tf.assign_add(count, tf.reduce_sum(weights * is_equal))
return count.read_value(), update_op
# Confusion matrix metrics.
num_labels = 2 if binary_classification else output_dim
for gold_label in range(num_labels):
for pred_label in range(num_labels):
metric_name = "confusion_matrix/label_{}_pred_{}".format(
gold_label, pred_label)
metrics[metric_name] = _count_condition(
metric_name, labels_value=gold_label, predicted_value=pred_label)
# Possibly create AUC metric for binary classification.
if binary_classification:
labels = tf.cast(labels, dtype=tf.bool)
metrics["auc"] = tf.metrics.auc(
labels, predictions, weights=weights, num_thresholds=1000)
return metrics
def create_metric_fn(model):
"""Creates a tuple (metric_fn, metric_fn_inputs).
This function is primarily used for creating a TPUEstimator.
The result of calling metric_fn(**metric_fn_inputs) is a dictionary
{metric_name: (metric_value, update_op)}.
Args:
model: Instance of AstroModel.
Returns:
A tuple (metric_fn, metric_fn_inputs).
"""
weights = model.weights
if weights is None:
weights = tf.ones_like(model.labels, dtype=tf.float32)
metric_fn_inputs = {
"labels": model.labels,
"predictions": model.predictions,
"weights": weights,
"batch_losses": model.batch_losses,
}
def metric_fn(labels, predictions, weights, batch_losses):
return _build_metrics(
labels,
predictions,
weights,
batch_losses,
output_dim=model.hparams.output_dim)
return metric_fn, metric_fn_inputs
def create_metrics(model):
"""Creates a dictionary {metric_name: (metric_value, update_op)}.
This function is primarily used for creating an Estimator.
Args:
model: Instance of AstroModel.
Returns:
A dictionary {metric_name: (metric_value, update_op).
"""
metric_fn, metric_fn_inputs = create_metric_fn(model)
return metric_fn(**metric_fn_inputs)
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for metrics.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from astronet.ops import metrics
def _unpack_metric_map(names_to_tuples):
"""Unpacks {metric_name: (metric_value, update_op)} into separate dicts."""
metric_names = names_to_tuples.keys()
value_ops, update_ops = zip(*names_to_tuples.values())
return dict(zip(metric_names, value_ops)), dict(zip(metric_names, update_ops))
class _MockHparams(object):
"""Mock Hparams class to support accessing with dot notation."""
pass
class _MockModel(object):
"""Mock model for testing."""
def __init__(self, labels, predictions, weights, batch_losses, output_dim):
self.labels = tf.constant(labels, dtype=tf.int32)
self.predictions = tf.constant(predictions, dtype=tf.float32)
self.weights = None if weights is None else tf.constant(
weights, dtype=tf.float32)
self.batch_losses = tf.constant(batch_losses, dtype=tf.float32)
self.hparams = _MockHparams()
self.hparams.output_dim = output_dim
class MetricsTest(tf.test.TestCase):
def testMultiClassificationWithoutWeights(self):
labels = [0, 1, 2, 3]
predictions = [
[0.7, 0.2, 0.1, 0.0], # Predicted label = 0
[0.2, 0.4, 0.2, 0.2], # Predicted label = 1
[0.0, 0.0, 0.0, 1.0], # Predicted label = 3
[0.1, 0.1, 0.7, 0.1], # Predicted label = 2
]
weights = None
batch_losses = [0, 0, 4, 2]
model = _MockModel(labels, predictions, weights, batch_losses, output_dim=4)
metric_map = metrics.create_metrics(model)
value_ops, update_ops = _unpack_metric_map(metric_map)
initializer = tf.local_variables_initializer()
with self.test_session() as sess:
sess.run(initializer)
sess.run(update_ops)
self.assertAllClose({
"num_examples": 4,
"accuracy/num_correct": 2,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1.5,
"confusion_matrix/label_0_pred_0": 1,
"confusion_matrix/label_0_pred_1": 0,
"confusion_matrix/label_0_pred_2": 0,
"confusion_matrix/label_0_pred_3": 0,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 1,
"confusion_matrix/label_1_pred_2": 0,
"confusion_matrix/label_1_pred_3": 0,
"confusion_matrix/label_2_pred_0": 0,
"confusion_matrix/label_2_pred_1": 0,
"confusion_matrix/label_2_pred_2": 0,
"confusion_matrix/label_2_pred_3": 1,
"confusion_matrix/label_3_pred_0": 0,
"confusion_matrix/label_3_pred_1": 0,
"confusion_matrix/label_3_pred_2": 1,
"confusion_matrix/label_3_pred_3": 0
}, sess.run(value_ops))
sess.run(update_ops)
self.assertAllClose({
"num_examples": 8,
"accuracy/num_correct": 4,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1.5,
"confusion_matrix/label_0_pred_0": 2,
"confusion_matrix/label_0_pred_1": 0,
"confusion_matrix/label_0_pred_2": 0,
"confusion_matrix/label_0_pred_3": 0,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 2,
"confusion_matrix/label_1_pred_2": 0,
"confusion_matrix/label_1_pred_3": 0,
"confusion_matrix/label_2_pred_0": 0,
"confusion_matrix/label_2_pred_1": 0,
"confusion_matrix/label_2_pred_2": 0,
"confusion_matrix/label_2_pred_3": 2,
"confusion_matrix/label_3_pred_0": 0,
"confusion_matrix/label_3_pred_1": 0,
"confusion_matrix/label_3_pred_2": 2,
"confusion_matrix/label_3_pred_3": 0
}, sess.run(value_ops))
def testMultiClassificationWithWeights(self):
labels = [0, 1, 2, 3]
predictions = [
[0.7, 0.2, 0.1, 0.0], # Predicted label = 0
[0.2, 0.4, 0.2, 0.2], # Predicted label = 1
[0.0, 0.0, 0.0, 1.0], # Predicted label = 3
[0.1, 0.1, 0.7, 0.1], # Predicted label = 2
]
weights = [0, 1, 0, 1]
batch_losses = [0, 0, 4, 2]
model = _MockModel(labels, predictions, weights, batch_losses, output_dim=4)
metric_map = metrics.create_metrics(model)
value_ops, update_ops = _unpack_metric_map(metric_map)
initializer = tf.local_variables_initializer()
with self.test_session() as sess:
sess.run(initializer)
sess.run(update_ops)
self.assertAllClose({
"num_examples": 2,
"accuracy/num_correct": 1,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1,
"confusion_matrix/label_0_pred_0": 0,
"confusion_matrix/label_0_pred_1": 0,
"confusion_matrix/label_0_pred_2": 0,
"confusion_matrix/label_0_pred_3": 0,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 1,
"confusion_matrix/label_1_pred_2": 0,
"confusion_matrix/label_1_pred_3": 0,
"confusion_matrix/label_2_pred_0": 0,
"confusion_matrix/label_2_pred_1": 0,
"confusion_matrix/label_2_pred_2": 0,
"confusion_matrix/label_2_pred_3": 0,
"confusion_matrix/label_3_pred_0": 0,
"confusion_matrix/label_3_pred_1": 0,
"confusion_matrix/label_3_pred_2": 1,
"confusion_matrix/label_3_pred_3": 0
}, sess.run(value_ops))
sess.run(update_ops)
self.assertAllClose({
"num_examples": 4,
"accuracy/num_correct": 2,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1,
"confusion_matrix/label_0_pred_0": 0,
"confusion_matrix/label_0_pred_1": 0,
"confusion_matrix/label_0_pred_2": 0,
"confusion_matrix/label_0_pred_3": 0,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 2,
"confusion_matrix/label_1_pred_2": 0,
"confusion_matrix/label_1_pred_3": 0,
"confusion_matrix/label_2_pred_0": 0,
"confusion_matrix/label_2_pred_1": 0,
"confusion_matrix/label_2_pred_2": 0,
"confusion_matrix/label_2_pred_3": 0,
"confusion_matrix/label_3_pred_0": 0,
"confusion_matrix/label_3_pred_1": 0,
"confusion_matrix/label_3_pred_2": 2,
"confusion_matrix/label_3_pred_3": 0
}, sess.run(value_ops))
def testBinaryClassificationWithoutWeights(self):
labels = [0, 1, 1, 0]
predictions = [
[0.4], # Predicted label = 0
[0.6], # Predicted label = 1
[0.0], # Predicted label = 0
[1.0], # Predicted label = 1
]
weights = None
batch_losses = [0, 0, 4, 2]
model = _MockModel(labels, predictions, weights, batch_losses, output_dim=1)
metric_map = metrics.create_metrics(model)
value_ops, update_ops = _unpack_metric_map(metric_map)
initializer = tf.local_variables_initializer()
with self.test_session() as sess:
sess.run(initializer)
sess.run(update_ops)
self.assertAllClose({
"num_examples": 4,
"accuracy/num_correct": 2,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1.5,
"auc": 0.25,
"confusion_matrix/label_0_pred_0": 1,
"confusion_matrix/label_0_pred_1": 1,
"confusion_matrix/label_1_pred_0": 1,
"confusion_matrix/label_1_pred_1": 1,
}, sess.run(value_ops))
sess.run(update_ops)
self.assertAllClose({
"num_examples": 8,
"accuracy/num_correct": 4,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1.5,
"auc": 0.25,
"confusion_matrix/label_0_pred_0": 2,
"confusion_matrix/label_0_pred_1": 2,
"confusion_matrix/label_1_pred_0": 2,
"confusion_matrix/label_1_pred_1": 2,
}, sess.run(value_ops))
def testBinaryClassificationWithWeights(self):
labels = [0, 1, 1, 0]
predictions = [
[0.4], # Predicted label = 0
[0.6], # Predicted label = 1
[0.0], # Predicted label = 0
[1.0], # Predicted label = 1
]
weights = [0, 1, 0, 1]
batch_losses = [0, 0, 4, 2]
model = _MockModel(labels, predictions, weights, batch_losses, output_dim=1)
metric_map = metrics.create_metrics(model)
value_ops, update_ops = _unpack_metric_map(metric_map)
initializer = tf.local_variables_initializer()
with self.test_session() as sess:
sess.run(initializer)
sess.run(update_ops)
self.assertAllClose({
"num_examples": 2,
"accuracy/num_correct": 1,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1,
"auc": 0,
"confusion_matrix/label_0_pred_0": 0,
"confusion_matrix/label_0_pred_1": 1,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 1,
}, sess.run(value_ops))
sess.run(update_ops)
self.assertAllClose({
"num_examples": 4,
"accuracy/num_correct": 2,
"accuracy/accuracy": 0.5,
"losses/weighted_cross_entropy": 1,
"auc": 0,
"confusion_matrix/label_0_pred_0": 0,
"confusion_matrix/label_0_pred_1": 2,
"confusion_matrix/label_1_pred_0": 0,
"confusion_matrix/label_1_pred_1": 2,
}, sess.run(value_ops))
if __name__ == "__main__":
tf.test.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
py_library(
name = "estimator_util",
srcs = ["estimator_util.py"],
srcs_version = "PY2AND3",
deps = [
"//astronet/ops:dataset_ops",
"//astronet/ops:metrics",
"//astronet/ops:training",
],
)
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment