Merge pull request #3622 from cshallue/py3

Python3 compatibility for AstroNet

Merge pull request #3622 from cshallue/py3
Python3 compatibility for AstroNet
d9266ae6 · Chris Shallue · GitHub · f85ab4c8 · 14b8bf25 · d9266ae6
Unverified Commit d9266ae6 authored Mar 19, 2018 by Chris Shallue Committed by GitHub Mar 19, 2018
14 changed files
--- a/research/astronet/README.md
+++ b/research/astronet/README.md
@@ -239,7 +239,7 @@ bazel build astronet/...
 TFRECORD_DIR="${HOME}/astronet/tfrecord"

 # Preprocess light curves into sharded TFRecord files using 5 worker processes.
-bazel-bin/tensorflow/data/generate_input_records \
+bazel-bin/astronet/data/generate_input_records \
  --input_tce_csv_file=${TCE_CSV_FILE} \
  --kepler_data_dir=${KEPLER_DATA_DIR} \
  --output_dir=${TFRECORD_DIR} \

--- a/research/astronet/astronet/astro_cnn_model/astro_cnn_model.py
+++ b/research/astronet/astronet/astro_cnn_model/astro_cnn_model.py
@@ -130,7 +130,7 @@ class AstroCNNModel(astro_model.AstroModel):
      self.time_series_hidden_layers
    """
    time_series_hidden_layers = {}
-    for name, time_series in self.time_series_features.iteritems():
+    for name, time_series in self.time_series_features.items():
      time_series_hidden_layers[name] = self._build_cnn_layers(
          inputs=time_series,
          hparams=self.hparams.time_series_hidden[name],

--- a/research/astronet/astronet/astro_fc_model/astro_fc_model.py
+++ b/research/astronet/astronet/astro_fc_model/astro_fc_model.py
@@ -151,7 +151,7 @@ class AstroFCModel(astro_model.AstroModel):
      self.time_series_hidden_layers
    """
    time_series_hidden_layers = {}
-    for name, time_series in self.time_series_features.iteritems():
+    for name, time_series in self.time_series_features.items():
      time_series_hidden_layers[name] = self._build_local_fc_layers(
          inputs=time_series,
          hparams=self.hparams.time_series_hidden[name],

--- a/research/astronet/astronet/data/generate_input_records.py
+++ b/research/astronet/astronet/data/generate_input_records.py
@@ -140,7 +140,8 @@ def _set_float_feature(ex, name, value):
 def _set_bytes_feature(ex, name, value):
  """Sets the value of a bytes feature in a tensorflow.train.Example proto."""
  assert name not in ex.features.feature, "Duplicate feature: %s" % name
-  ex.features.feature[name].bytes_list.value.extend([str(v) for v in value])
+  ex.features.feature[name].bytes_list.value.extend([
+      str(v).encode("latin-1") for v in value])


 def _set_int64_feature(ex, name, value):
@@ -180,14 +181,14 @@ def _process_tce(tce):
  _set_float_feature(ex, "local_view", local_view)

  # Set other columns.
-  for col_name, value in tce.iteritems():
+  for col_name, value in tce.items():
    if np.issubdtype(type(value), np.integer):
      _set_int64_feature(ex, col_name, [value])
    else:
      try:
        _set_float_feature(ex, col_name, [float(value)])
      except ValueError:
-        _set_bytes_feature(ex, col_name, [str(value)])
+        _set_bytes_feature(ex, col_name, [value])

  return ex


--- a/research/astronet/astronet/ops/dataset_ops.py
+++ b/research/astronet/astronet/ops/dataset_ops.py
@@ -60,7 +60,7 @@ def _recursive_pad_to_batch_size(tensor_or_collection, batch_size):
  if isinstance(tensor_or_collection, dict):
    return {
        name: _recursive_pad_to_batch_size(t, batch_size)
-        for name, t in tensor_or_collection.iteritems()
+        for name, t in tensor_or_collection.items()
    }

  if isinstance(tensor_or_collection, collections.Iterable):
@@ -185,8 +185,8 @@ def build_dataset(file_pattern,

    # Create a HashTable mapping label strings to integer ids.
    table_initializer = tf.contrib.lookup.KeyValueTensorInitializer(
-        keys=input_config.label_map.keys(),
-        values=input_config.label_map.values(),
+        keys=list(input_config.label_map.keys()),
+        values=list(input_config.label_map.values()),
        key_dtype=tf.string,
        value_dtype=tf.int32)
    label_to_id = tf.contrib.lookup.HashTable(
@@ -197,7 +197,7 @@ def build_dataset(file_pattern,
    # Set specifications for parsing the features.
    data_fields = {
        feature_name: tf.FixedLenFeature([feature.length], tf.float32)
-        for feature_name, feature in input_config.features.iteritems()
+        for feature_name, feature in input_config.features.items()
    }
    if include_labels:
      data_fields[input_config.label_feature] = tf.FixedLenFeature([],
@@ -217,7 +217,7 @@ def build_dataset(file_pattern,

    # Reorganize outputs.
    output = {}
-    for feature_name, value in parsed_features.iteritems():
+    for feature_name, value in parsed_features.items():
      if include_labels and feature_name == input_config.label_feature:
        label_id = label_to_id.lookup(value)
        # Ensure that the label_id is nonnegative to verify a successful hash

--- a/research/astronet/astronet/ops/input_ops.py
+++ b/research/astronet/astronet/ops/input_ops.py
@@ -37,9 +37,9 @@ def prepare_feed_dict(model, features, labels=None, is_training=None):
    feed_dict: A dictionary of input Tensor to numpy array.
  """
  feed_dict = {}
-  for feature, tensor in model.time_series_features.iteritems():
+  for feature, tensor in model.time_series_features.items():
    feed_dict[tensor] = features["time_series_features"][feature]
-  for feature, tensor in model.aux_features.iteritems():
+  for feature, tensor in model.aux_features.items():
    feed_dict[tensor] = features["aux_features"][feature]

  if labels is not None:
@@ -65,7 +65,7 @@ def build_feature_placeholders(config):
  """
  batch_size = None  # Batch size will be dynamically specified.
  features = {"time_series_features": {}, "aux_features": {}}
-  for feature_name, feature_spec in config.iteritems():
+  for feature_name, feature_spec in config.items():
    placeholder = tf.placeholder(
        dtype=tf.float32,
        shape=[batch_size, feature_spec.length],

--- a/research/astronet/astronet/ops/input_ops_test.py
+++ b/research/astronet/astronet/ops/input_ops_test.py
@@ -39,7 +39,7 @@ class InputOpsTest(tf.test.TestCase):
    for feature_type in features:
      actual_shapes[feature_type] = {
          feature: tensor.shape.as_list()
-          for feature, tensor in features[feature_type].iteritems()
+          for feature, tensor in features[feature_type].items()
      }
    self.assertDictEqual(expected_shapes, actual_shapes)


--- a/research/astronet/astronet/ops/testing.py
+++ b/research/astronet/astronet/ops/testing.py
@@ -50,11 +50,11 @@ def fake_features(feature_spec, batch_size):
  features = {}
  features["time_series_features"] = {
      name: np.random.random([batch_size, spec["length"]])
-      for name, spec in feature_spec.iteritems() if spec["is_time_series"]
+      for name, spec in feature_spec.items() if spec["is_time_series"]
  }
  features["aux_features"] = {
      name: np.random.random([batch_size, spec["length"]])
-      for name, spec in feature_spec.iteritems() if not spec["is_time_series"]
+      for name, spec in feature_spec.items() if not spec["is_time_series"]
  }
  return features


--- a/research/astronet/astronet/util/config_util.py
+++ b/research/astronet/astronet/util/config_util.py
@@ -110,7 +110,7 @@ def unflatten(flat_config):
    A dictionary nested according to the keys of the input dictionary.
  """
  config = {}
-  for path, value in flat_config.iteritems():
+  for path, value in flat_config.items():
    path = path.split(".")
    final_key = path.pop()
    nested_config = config

--- a/research/astronet/astronet/util/configdict.py
+++ b/research/astronet/astronet/util/configdict.py
@@ -41,7 +41,7 @@ class ConfigDict(dict):
        parameters.
    """
    if initial_dictionary:
-      for field, value in initial_dictionary.iteritems():
+      for field, value in initial_dictionary.items():
        initial_dictionary[field] = _maybe_convert_dict(value)
    super(ConfigDict, self).__init__(initial_dictionary)


--- a/research/astronet/astronet/util/estimator_util.py
+++ b/research/astronet/astronet/util/estimator_util.py
@@ -69,15 +69,7 @@ def create_input_fn(file_pattern,
        repeat=repeat,
        use_tpu=use_tpu)

-    # We must use an initializable iterator, rather than a one-shot iterator,
-    # because the input pipeline contains a stateful table that requires
-    # initialization. We add the initializer to the TABLE_INITIALIZERS
-    # collection to ensure it is run during initialization.
-    iterator = dataset.make_initializable_iterator()
-    tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
-
-    inputs = iterator.get_next()
-    return inputs, inputs.pop("labels", None)
+    return dataset

  return input_fn

@@ -103,6 +95,14 @@ def create_model_fn(model_class, hparams, use_tpu=False):
    if "batch_size" in params:
      hparams.batch_size = params["batch_size"]

+    # Allow labels to be passed in the features dictionary.
+    if "labels" in features:
+      if labels is not None and labels is not features["labels"]:
+        raise ValueError(
+            "Conflicting labels: features['labels'] = %s, labels = %s" %
+            (features["labels"], labels))
+      labels = features.pop("labels")
+
    model = model_class(features, labels, hparams, mode)
    model.build()


--- a/research/astronet/light_curve_util/kepler_io.py
+++ b/research/astronet/light_curve_util/kepler_io.py
@@ -160,7 +160,7 @@ def read_kepler_light_curve(filenames,
  all_flux = []

  for filename in filenames:
-    with fits.open(open(filename, "r")) as hdu_list:
+    with fits.open(open(filename, "rb")) as hdu_list:
      light_curve = hdu_list[light_curve_extension].data
      time = light_curve.TIME
      flux = light_curve.PDCSAP_FLUX

--- a/research/astronet/light_curve_util/util.py
+++ b/research/astronet/light_curve_util/util.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function

 import collections
-import itertools

 import numpy as np
 from six.moves import range  # pylint:disable=redefined-builtin
@@ -72,7 +71,7 @@ def split(all_time, all_flux, gap_width=0.75):

  out_time = []
  out_flux = []
-  for time, flux in itertools.izip(all_time, all_flux):
+  for time, flux in zip(all_time, all_flux):
    start = 0
    for end in range(1, len(time) + 1):
      # Choose the largest endpoint such that time[start:end] has no gaps.
@@ -117,7 +116,7 @@ def remove_events(all_time, all_flux, events, width_factor=1.0):

  output_time = []
  output_flux = []
-  for time, flux in itertools.izip(all_time, all_flux):
+  for time, flux in zip(all_time, all_flux):
    mask = np.ones_like(time, dtype=np.bool)
    for event in events:
      transit_dist = np.abs(phase_fold_time(time, event.period, event.t0))
@@ -149,7 +148,7 @@ def interpolate_masked_spline(all_time, all_masked_time, all_masked_spline):
        points linearly interpolated.
  """
  interp_spline = []
-  for time, masked_time, masked_spline in itertools.izip(
+  for time, masked_time, masked_spline in zip(
      all_time, all_masked_time, all_masked_spline):
    if len(masked_time) > 0:  # pylint:disable=g-explicit-length-test
      interp_spline.append(np.interp(time, masked_time, masked_spline))

--- a/research/astronet/third_party/kepler_spline/kepler_spline.py
+++ b/research/astronet/third_party/kepler_spline/kepler_spline.py
@@ -4,7 +4,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import itertools
 import warnings

 import numpy as np
@@ -149,7 +148,7 @@ def choose_kepler_spline(all_time,
    spline = []
    spline_mask = []
    bad_bkspace = False  # Indicates that the current bkspace should be skipped.
-    for time, flux in itertools.izip(all_time, all_flux):
+    for time, flux in zip(all_time, all_flux):
      # Don't fit a spline on less than 4 points.
      if len(time) < 4:
        spline.append(flux)