"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "2abce87a31b802bb2714271dc095c3b6bc3f336d"
Unverified Commit d9266ae6 authored by Chris Shallue's avatar Chris Shallue Committed by GitHub
Browse files

Merge pull request #3622 from cshallue/py3

Python3 compatibility for AstroNet
parents f85ab4c8 14b8bf25
...@@ -239,7 +239,7 @@ bazel build astronet/... ...@@ -239,7 +239,7 @@ bazel build astronet/...
TFRECORD_DIR="${HOME}/astronet/tfrecord" TFRECORD_DIR="${HOME}/astronet/tfrecord"
# Preprocess light curves into sharded TFRecord files using 5 worker processes. # Preprocess light curves into sharded TFRecord files using 5 worker processes.
bazel-bin/tensorflow/data/generate_input_records \ bazel-bin/astronet/data/generate_input_records \
--input_tce_csv_file=${TCE_CSV_FILE} \ --input_tce_csv_file=${TCE_CSV_FILE} \
--kepler_data_dir=${KEPLER_DATA_DIR} \ --kepler_data_dir=${KEPLER_DATA_DIR} \
--output_dir=${TFRECORD_DIR} \ --output_dir=${TFRECORD_DIR} \
......
...@@ -130,7 +130,7 @@ class AstroCNNModel(astro_model.AstroModel): ...@@ -130,7 +130,7 @@ class AstroCNNModel(astro_model.AstroModel):
self.time_series_hidden_layers self.time_series_hidden_layers
""" """
time_series_hidden_layers = {} time_series_hidden_layers = {}
for name, time_series in self.time_series_features.iteritems(): for name, time_series in self.time_series_features.items():
time_series_hidden_layers[name] = self._build_cnn_layers( time_series_hidden_layers[name] = self._build_cnn_layers(
inputs=time_series, inputs=time_series,
hparams=self.hparams.time_series_hidden[name], hparams=self.hparams.time_series_hidden[name],
......
...@@ -151,7 +151,7 @@ class AstroFCModel(astro_model.AstroModel): ...@@ -151,7 +151,7 @@ class AstroFCModel(astro_model.AstroModel):
self.time_series_hidden_layers self.time_series_hidden_layers
""" """
time_series_hidden_layers = {} time_series_hidden_layers = {}
for name, time_series in self.time_series_features.iteritems(): for name, time_series in self.time_series_features.items():
time_series_hidden_layers[name] = self._build_local_fc_layers( time_series_hidden_layers[name] = self._build_local_fc_layers(
inputs=time_series, inputs=time_series,
hparams=self.hparams.time_series_hidden[name], hparams=self.hparams.time_series_hidden[name],
......
...@@ -140,7 +140,8 @@ def _set_float_feature(ex, name, value): ...@@ -140,7 +140,8 @@ def _set_float_feature(ex, name, value):
def _set_bytes_feature(ex, name, value): def _set_bytes_feature(ex, name, value):
"""Sets the value of a bytes feature in a tensorflow.train.Example proto.""" """Sets the value of a bytes feature in a tensorflow.train.Example proto."""
assert name not in ex.features.feature, "Duplicate feature: %s" % name assert name not in ex.features.feature, "Duplicate feature: %s" % name
ex.features.feature[name].bytes_list.value.extend([str(v) for v in value]) ex.features.feature[name].bytes_list.value.extend([
str(v).encode("latin-1") for v in value])
def _set_int64_feature(ex, name, value): def _set_int64_feature(ex, name, value):
...@@ -180,14 +181,14 @@ def _process_tce(tce): ...@@ -180,14 +181,14 @@ def _process_tce(tce):
_set_float_feature(ex, "local_view", local_view) _set_float_feature(ex, "local_view", local_view)
# Set other columns. # Set other columns.
for col_name, value in tce.iteritems(): for col_name, value in tce.items():
if np.issubdtype(type(value), np.integer): if np.issubdtype(type(value), np.integer):
_set_int64_feature(ex, col_name, [value]) _set_int64_feature(ex, col_name, [value])
else: else:
try: try:
_set_float_feature(ex, col_name, [float(value)]) _set_float_feature(ex, col_name, [float(value)])
except ValueError: except ValueError:
_set_bytes_feature(ex, col_name, [str(value)]) _set_bytes_feature(ex, col_name, [value])
return ex return ex
......
...@@ -60,7 +60,7 @@ def _recursive_pad_to_batch_size(tensor_or_collection, batch_size): ...@@ -60,7 +60,7 @@ def _recursive_pad_to_batch_size(tensor_or_collection, batch_size):
if isinstance(tensor_or_collection, dict): if isinstance(tensor_or_collection, dict):
return { return {
name: _recursive_pad_to_batch_size(t, batch_size) name: _recursive_pad_to_batch_size(t, batch_size)
for name, t in tensor_or_collection.iteritems() for name, t in tensor_or_collection.items()
} }
if isinstance(tensor_or_collection, collections.Iterable): if isinstance(tensor_or_collection, collections.Iterable):
...@@ -185,8 +185,8 @@ def build_dataset(file_pattern, ...@@ -185,8 +185,8 @@ def build_dataset(file_pattern,
# Create a HashTable mapping label strings to integer ids. # Create a HashTable mapping label strings to integer ids.
table_initializer = tf.contrib.lookup.KeyValueTensorInitializer( table_initializer = tf.contrib.lookup.KeyValueTensorInitializer(
keys=input_config.label_map.keys(), keys=list(input_config.label_map.keys()),
values=input_config.label_map.values(), values=list(input_config.label_map.values()),
key_dtype=tf.string, key_dtype=tf.string,
value_dtype=tf.int32) value_dtype=tf.int32)
label_to_id = tf.contrib.lookup.HashTable( label_to_id = tf.contrib.lookup.HashTable(
...@@ -197,7 +197,7 @@ def build_dataset(file_pattern, ...@@ -197,7 +197,7 @@ def build_dataset(file_pattern,
# Set specifications for parsing the features. # Set specifications for parsing the features.
data_fields = { data_fields = {
feature_name: tf.FixedLenFeature([feature.length], tf.float32) feature_name: tf.FixedLenFeature([feature.length], tf.float32)
for feature_name, feature in input_config.features.iteritems() for feature_name, feature in input_config.features.items()
} }
if include_labels: if include_labels:
data_fields[input_config.label_feature] = tf.FixedLenFeature([], data_fields[input_config.label_feature] = tf.FixedLenFeature([],
...@@ -217,7 +217,7 @@ def build_dataset(file_pattern, ...@@ -217,7 +217,7 @@ def build_dataset(file_pattern,
# Reorganize outputs. # Reorganize outputs.
output = {} output = {}
for feature_name, value in parsed_features.iteritems(): for feature_name, value in parsed_features.items():
if include_labels and feature_name == input_config.label_feature: if include_labels and feature_name == input_config.label_feature:
label_id = label_to_id.lookup(value) label_id = label_to_id.lookup(value)
# Ensure that the label_id is nonnegative to verify a successful hash # Ensure that the label_id is nonnegative to verify a successful hash
......
...@@ -37,9 +37,9 @@ def prepare_feed_dict(model, features, labels=None, is_training=None): ...@@ -37,9 +37,9 @@ def prepare_feed_dict(model, features, labels=None, is_training=None):
feed_dict: A dictionary of input Tensor to numpy array. feed_dict: A dictionary of input Tensor to numpy array.
""" """
feed_dict = {} feed_dict = {}
for feature, tensor in model.time_series_features.iteritems(): for feature, tensor in model.time_series_features.items():
feed_dict[tensor] = features["time_series_features"][feature] feed_dict[tensor] = features["time_series_features"][feature]
for feature, tensor in model.aux_features.iteritems(): for feature, tensor in model.aux_features.items():
feed_dict[tensor] = features["aux_features"][feature] feed_dict[tensor] = features["aux_features"][feature]
if labels is not None: if labels is not None:
...@@ -65,7 +65,7 @@ def build_feature_placeholders(config): ...@@ -65,7 +65,7 @@ def build_feature_placeholders(config):
""" """
batch_size = None # Batch size will be dynamically specified. batch_size = None # Batch size will be dynamically specified.
features = {"time_series_features": {}, "aux_features": {}} features = {"time_series_features": {}, "aux_features": {}}
for feature_name, feature_spec in config.iteritems(): for feature_name, feature_spec in config.items():
placeholder = tf.placeholder( placeholder = tf.placeholder(
dtype=tf.float32, dtype=tf.float32,
shape=[batch_size, feature_spec.length], shape=[batch_size, feature_spec.length],
......
...@@ -39,7 +39,7 @@ class InputOpsTest(tf.test.TestCase): ...@@ -39,7 +39,7 @@ class InputOpsTest(tf.test.TestCase):
for feature_type in features: for feature_type in features:
actual_shapes[feature_type] = { actual_shapes[feature_type] = {
feature: tensor.shape.as_list() feature: tensor.shape.as_list()
for feature, tensor in features[feature_type].iteritems() for feature, tensor in features[feature_type].items()
} }
self.assertDictEqual(expected_shapes, actual_shapes) self.assertDictEqual(expected_shapes, actual_shapes)
......
...@@ -50,11 +50,11 @@ def fake_features(feature_spec, batch_size): ...@@ -50,11 +50,11 @@ def fake_features(feature_spec, batch_size):
features = {} features = {}
features["time_series_features"] = { features["time_series_features"] = {
name: np.random.random([batch_size, spec["length"]]) name: np.random.random([batch_size, spec["length"]])
for name, spec in feature_spec.iteritems() if spec["is_time_series"] for name, spec in feature_spec.items() if spec["is_time_series"]
} }
features["aux_features"] = { features["aux_features"] = {
name: np.random.random([batch_size, spec["length"]]) name: np.random.random([batch_size, spec["length"]])
for name, spec in feature_spec.iteritems() if not spec["is_time_series"] for name, spec in feature_spec.items() if not spec["is_time_series"]
} }
return features return features
......
...@@ -110,7 +110,7 @@ def unflatten(flat_config): ...@@ -110,7 +110,7 @@ def unflatten(flat_config):
A dictionary nested according to the keys of the input dictionary. A dictionary nested according to the keys of the input dictionary.
""" """
config = {} config = {}
for path, value in flat_config.iteritems(): for path, value in flat_config.items():
path = path.split(".") path = path.split(".")
final_key = path.pop() final_key = path.pop()
nested_config = config nested_config = config
......
...@@ -41,7 +41,7 @@ class ConfigDict(dict): ...@@ -41,7 +41,7 @@ class ConfigDict(dict):
parameters. parameters.
""" """
if initial_dictionary: if initial_dictionary:
for field, value in initial_dictionary.iteritems(): for field, value in initial_dictionary.items():
initial_dictionary[field] = _maybe_convert_dict(value) initial_dictionary[field] = _maybe_convert_dict(value)
super(ConfigDict, self).__init__(initial_dictionary) super(ConfigDict, self).__init__(initial_dictionary)
......
...@@ -69,15 +69,7 @@ def create_input_fn(file_pattern, ...@@ -69,15 +69,7 @@ def create_input_fn(file_pattern,
repeat=repeat, repeat=repeat,
use_tpu=use_tpu) use_tpu=use_tpu)
# We must use an initializable iterator, rather than a one-shot iterator, return dataset
# because the input pipeline contains a stateful table that requires
# initialization. We add the initializer to the TABLE_INITIALIZERS
# collection to ensure it is run during initialization.
iterator = dataset.make_initializable_iterator()
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
inputs = iterator.get_next()
return inputs, inputs.pop("labels", None)
return input_fn return input_fn
...@@ -103,6 +95,14 @@ def create_model_fn(model_class, hparams, use_tpu=False): ...@@ -103,6 +95,14 @@ def create_model_fn(model_class, hparams, use_tpu=False):
if "batch_size" in params: if "batch_size" in params:
hparams.batch_size = params["batch_size"] hparams.batch_size = params["batch_size"]
# Allow labels to be passed in the features dictionary.
if "labels" in features:
if labels is not None and labels is not features["labels"]:
raise ValueError(
"Conflicting labels: features['labels'] = %s, labels = %s" %
(features["labels"], labels))
labels = features.pop("labels")
model = model_class(features, labels, hparams, mode) model = model_class(features, labels, hparams, mode)
model.build() model.build()
......
...@@ -160,7 +160,7 @@ def read_kepler_light_curve(filenames, ...@@ -160,7 +160,7 @@ def read_kepler_light_curve(filenames,
all_flux = [] all_flux = []
for filename in filenames: for filename in filenames:
with fits.open(open(filename, "r")) as hdu_list: with fits.open(open(filename, "rb")) as hdu_list:
light_curve = hdu_list[light_curve_extension].data light_curve = hdu_list[light_curve_extension].data
time = light_curve.TIME time = light_curve.TIME
flux = light_curve.PDCSAP_FLUX flux = light_curve.PDCSAP_FLUX
......
...@@ -19,7 +19,6 @@ from __future__ import division ...@@ -19,7 +19,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import itertools
import numpy as np import numpy as np
from six.moves import range # pylint:disable=redefined-builtin from six.moves import range # pylint:disable=redefined-builtin
...@@ -72,7 +71,7 @@ def split(all_time, all_flux, gap_width=0.75): ...@@ -72,7 +71,7 @@ def split(all_time, all_flux, gap_width=0.75):
out_time = [] out_time = []
out_flux = [] out_flux = []
for time, flux in itertools.izip(all_time, all_flux): for time, flux in zip(all_time, all_flux):
start = 0 start = 0
for end in range(1, len(time) + 1): for end in range(1, len(time) + 1):
# Choose the largest endpoint such that time[start:end] has no gaps. # Choose the largest endpoint such that time[start:end] has no gaps.
...@@ -117,7 +116,7 @@ def remove_events(all_time, all_flux, events, width_factor=1.0): ...@@ -117,7 +116,7 @@ def remove_events(all_time, all_flux, events, width_factor=1.0):
output_time = [] output_time = []
output_flux = [] output_flux = []
for time, flux in itertools.izip(all_time, all_flux): for time, flux in zip(all_time, all_flux):
mask = np.ones_like(time, dtype=np.bool) mask = np.ones_like(time, dtype=np.bool)
for event in events: for event in events:
transit_dist = np.abs(phase_fold_time(time, event.period, event.t0)) transit_dist = np.abs(phase_fold_time(time, event.period, event.t0))
...@@ -149,7 +148,7 @@ def interpolate_masked_spline(all_time, all_masked_time, all_masked_spline): ...@@ -149,7 +148,7 @@ def interpolate_masked_spline(all_time, all_masked_time, all_masked_spline):
points linearly interpolated. points linearly interpolated.
""" """
interp_spline = [] interp_spline = []
for time, masked_time, masked_spline in itertools.izip( for time, masked_time, masked_spline in zip(
all_time, all_masked_time, all_masked_spline): all_time, all_masked_time, all_masked_spline):
if len(masked_time) > 0: # pylint:disable=g-explicit-length-test if len(masked_time) > 0: # pylint:disable=g-explicit-length-test
interp_spline.append(np.interp(time, masked_time, masked_spline)) interp_spline.append(np.interp(time, masked_time, masked_spline))
......
...@@ -4,7 +4,6 @@ from __future__ import absolute_import ...@@ -4,7 +4,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import itertools
import warnings import warnings
import numpy as np import numpy as np
...@@ -149,7 +148,7 @@ def choose_kepler_spline(all_time, ...@@ -149,7 +148,7 @@ def choose_kepler_spline(all_time,
spline = [] spline = []
spline_mask = [] spline_mask = []
bad_bkspace = False # Indicates that the current bkspace should be skipped. bad_bkspace = False # Indicates that the current bkspace should be skipped.
for time, flux in itertools.izip(all_time, all_flux): for time, flux in zip(all_time, all_flux):
# Don't fit a spline on less than 4 points. # Don't fit a spline on less than 4 points.
if len(time) < 4: if len(time) < 4:
spline.append(flux) spline.append(flux)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment