Unverified Commit d9266ae6 authored by Chris Shallue's avatar Chris Shallue Committed by GitHub
Browse files

Merge pull request #3622 from cshallue/py3

Python3 compatibility for AstroNet
parents f85ab4c8 14b8bf25
......@@ -239,7 +239,7 @@ bazel build astronet/...
TFRECORD_DIR="${HOME}/astronet/tfrecord"
# Preprocess light curves into sharded TFRecord files using 5 worker processes.
bazel-bin/tensorflow/data/generate_input_records \
bazel-bin/astronet/data/generate_input_records \
--input_tce_csv_file=${TCE_CSV_FILE} \
--kepler_data_dir=${KEPLER_DATA_DIR} \
--output_dir=${TFRECORD_DIR} \
......
......@@ -130,7 +130,7 @@ class AstroCNNModel(astro_model.AstroModel):
self.time_series_hidden_layers
"""
time_series_hidden_layers = {}
for name, time_series in self.time_series_features.iteritems():
for name, time_series in self.time_series_features.items():
time_series_hidden_layers[name] = self._build_cnn_layers(
inputs=time_series,
hparams=self.hparams.time_series_hidden[name],
......
......@@ -151,7 +151,7 @@ class AstroFCModel(astro_model.AstroModel):
self.time_series_hidden_layers
"""
time_series_hidden_layers = {}
for name, time_series in self.time_series_features.iteritems():
for name, time_series in self.time_series_features.items():
time_series_hidden_layers[name] = self._build_local_fc_layers(
inputs=time_series,
hparams=self.hparams.time_series_hidden[name],
......
......@@ -140,7 +140,8 @@ def _set_float_feature(ex, name, value):
def _set_bytes_feature(ex, name, value):
"""Sets the value of a bytes feature in a tensorflow.train.Example proto."""
assert name not in ex.features.feature, "Duplicate feature: %s" % name
ex.features.feature[name].bytes_list.value.extend([str(v) for v in value])
ex.features.feature[name].bytes_list.value.extend([
str(v).encode("latin-1") for v in value])
def _set_int64_feature(ex, name, value):
......@@ -180,14 +181,14 @@ def _process_tce(tce):
_set_float_feature(ex, "local_view", local_view)
# Set other columns.
for col_name, value in tce.iteritems():
for col_name, value in tce.items():
if np.issubdtype(type(value), np.integer):
_set_int64_feature(ex, col_name, [value])
else:
try:
_set_float_feature(ex, col_name, [float(value)])
except ValueError:
_set_bytes_feature(ex, col_name, [str(value)])
_set_bytes_feature(ex, col_name, [value])
return ex
......
......@@ -60,7 +60,7 @@ def _recursive_pad_to_batch_size(tensor_or_collection, batch_size):
if isinstance(tensor_or_collection, dict):
return {
name: _recursive_pad_to_batch_size(t, batch_size)
for name, t in tensor_or_collection.iteritems()
for name, t in tensor_or_collection.items()
}
if isinstance(tensor_or_collection, collections.Iterable):
......@@ -185,8 +185,8 @@ def build_dataset(file_pattern,
# Create a HashTable mapping label strings to integer ids.
table_initializer = tf.contrib.lookup.KeyValueTensorInitializer(
keys=input_config.label_map.keys(),
values=input_config.label_map.values(),
keys=list(input_config.label_map.keys()),
values=list(input_config.label_map.values()),
key_dtype=tf.string,
value_dtype=tf.int32)
label_to_id = tf.contrib.lookup.HashTable(
......@@ -197,7 +197,7 @@ def build_dataset(file_pattern,
# Set specifications for parsing the features.
data_fields = {
feature_name: tf.FixedLenFeature([feature.length], tf.float32)
for feature_name, feature in input_config.features.iteritems()
for feature_name, feature in input_config.features.items()
}
if include_labels:
data_fields[input_config.label_feature] = tf.FixedLenFeature([],
......@@ -217,7 +217,7 @@ def build_dataset(file_pattern,
# Reorganize outputs.
output = {}
for feature_name, value in parsed_features.iteritems():
for feature_name, value in parsed_features.items():
if include_labels and feature_name == input_config.label_feature:
label_id = label_to_id.lookup(value)
# Ensure that the label_id is nonnegative to verify a successful hash
......
......@@ -37,9 +37,9 @@ def prepare_feed_dict(model, features, labels=None, is_training=None):
feed_dict: A dictionary of input Tensor to numpy array.
"""
feed_dict = {}
for feature, tensor in model.time_series_features.iteritems():
for feature, tensor in model.time_series_features.items():
feed_dict[tensor] = features["time_series_features"][feature]
for feature, tensor in model.aux_features.iteritems():
for feature, tensor in model.aux_features.items():
feed_dict[tensor] = features["aux_features"][feature]
if labels is not None:
......@@ -65,7 +65,7 @@ def build_feature_placeholders(config):
"""
batch_size = None # Batch size will be dynamically specified.
features = {"time_series_features": {}, "aux_features": {}}
for feature_name, feature_spec in config.iteritems():
for feature_name, feature_spec in config.items():
placeholder = tf.placeholder(
dtype=tf.float32,
shape=[batch_size, feature_spec.length],
......
......@@ -39,7 +39,7 @@ class InputOpsTest(tf.test.TestCase):
for feature_type in features:
actual_shapes[feature_type] = {
feature: tensor.shape.as_list()
for feature, tensor in features[feature_type].iteritems()
for feature, tensor in features[feature_type].items()
}
self.assertDictEqual(expected_shapes, actual_shapes)
......
......@@ -50,11 +50,11 @@ def fake_features(feature_spec, batch_size):
features = {}
features["time_series_features"] = {
name: np.random.random([batch_size, spec["length"]])
for name, spec in feature_spec.iteritems() if spec["is_time_series"]
for name, spec in feature_spec.items() if spec["is_time_series"]
}
features["aux_features"] = {
name: np.random.random([batch_size, spec["length"]])
for name, spec in feature_spec.iteritems() if not spec["is_time_series"]
for name, spec in feature_spec.items() if not spec["is_time_series"]
}
return features
......
......@@ -110,7 +110,7 @@ def unflatten(flat_config):
A dictionary nested according to the keys of the input dictionary.
"""
config = {}
for path, value in flat_config.iteritems():
for path, value in flat_config.items():
path = path.split(".")
final_key = path.pop()
nested_config = config
......
......@@ -41,7 +41,7 @@ class ConfigDict(dict):
parameters.
"""
if initial_dictionary:
for field, value in initial_dictionary.iteritems():
for field, value in initial_dictionary.items():
initial_dictionary[field] = _maybe_convert_dict(value)
super(ConfigDict, self).__init__(initial_dictionary)
......
......@@ -69,15 +69,7 @@ def create_input_fn(file_pattern,
repeat=repeat,
use_tpu=use_tpu)
# We must use an initializable iterator, rather than a one-shot iterator,
# because the input pipeline contains a stateful table that requires
# initialization. We add the initializer to the TABLE_INITIALIZERS
# collection to ensure it is run during initialization.
iterator = dataset.make_initializable_iterator()
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
inputs = iterator.get_next()
return inputs, inputs.pop("labels", None)
return dataset
return input_fn
......@@ -103,6 +95,14 @@ def create_model_fn(model_class, hparams, use_tpu=False):
if "batch_size" in params:
hparams.batch_size = params["batch_size"]
# Allow labels to be passed in the features dictionary.
if "labels" in features:
if labels is not None and labels is not features["labels"]:
raise ValueError(
"Conflicting labels: features['labels'] = %s, labels = %s" %
(features["labels"], labels))
labels = features.pop("labels")
model = model_class(features, labels, hparams, mode)
model.build()
......
......@@ -160,7 +160,7 @@ def read_kepler_light_curve(filenames,
all_flux = []
for filename in filenames:
with fits.open(open(filename, "r")) as hdu_list:
with fits.open(open(filename, "rb")) as hdu_list:
light_curve = hdu_list[light_curve_extension].data
time = light_curve.TIME
flux = light_curve.PDCSAP_FLUX
......
......@@ -19,7 +19,6 @@ from __future__ import division
from __future__ import print_function
import collections
import itertools
import numpy as np
from six.moves import range # pylint:disable=redefined-builtin
......@@ -72,7 +71,7 @@ def split(all_time, all_flux, gap_width=0.75):
out_time = []
out_flux = []
for time, flux in itertools.izip(all_time, all_flux):
for time, flux in zip(all_time, all_flux):
start = 0
for end in range(1, len(time) + 1):
# Choose the largest endpoint such that time[start:end] has no gaps.
......@@ -117,7 +116,7 @@ def remove_events(all_time, all_flux, events, width_factor=1.0):
output_time = []
output_flux = []
for time, flux in itertools.izip(all_time, all_flux):
for time, flux in zip(all_time, all_flux):
mask = np.ones_like(time, dtype=np.bool)
for event in events:
transit_dist = np.abs(phase_fold_time(time, event.period, event.t0))
......@@ -149,7 +148,7 @@ def interpolate_masked_spline(all_time, all_masked_time, all_masked_spline):
points linearly interpolated.
"""
interp_spline = []
for time, masked_time, masked_spline in itertools.izip(
for time, masked_time, masked_spline in zip(
all_time, all_masked_time, all_masked_spline):
if len(masked_time) > 0: # pylint:disable=g-explicit-length-test
interp_spline.append(np.interp(time, masked_time, masked_spline))
......
......@@ -4,7 +4,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools
import warnings
import numpy as np
......@@ -149,7 +148,7 @@ def choose_kepler_spline(all_time,
spline = []
spline_mask = []
bad_bkspace = False # Indicates that the current bkspace should be skipped.
for time, flux in itertools.izip(all_time, all_flux):
for time, flux in zip(all_time, all_flux):
# Don't fit a spline on less than 4 points.
if len(time) < 4:
spline.append(flux)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment