"git@developer.sourcefind.cn:Wenxuan/LightX2V.git" did not exist on "60f0b6c15b04f193ee31a2a1be5b80d2ac0d398c"
Unverified Commit 2c181308 authored by Chris Shallue's avatar Chris Shallue Committed by GitHub
Browse files

Merge pull request #5862 from cshallue/master

Move tensorflow_models/research/astronet to google-research/exoplanet-ml
parents caafb6d1 62704f06
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
py_library(
name = "base",
srcs = [
"base.py",
],
deps = [
"//astronet/ops:dataset_ops",
"//tf_util:configdict",
],
)
py_test(
name = "base_test",
srcs = ["base_test.py"],
data = ["test_data/test-dataset.tfrecord"],
srcs_version = "PY2AND3",
deps = [":base"],
)
py_library(
name = "kepler_light_curves",
srcs = [
"kepler_light_curves.py",
],
deps = [
":base",
"//tf_util:configdict",
],
)
py_library(
name = "synthetic_transits",
srcs = [
"synthetic_transits.py",
],
deps = [
":base",
":synthetic_transit_maker",
"//tf_util:configdict",
],
)
py_library(
name = "synthetic_transit_maker",
srcs = [
"synthetic_transit_maker.py",
],
)
py_test(
name = "synthetic_transit_maker_test",
srcs = ["synthetic_transit_maker_test.py"],
srcs_version = "PY2AND3",
deps = [":synthetic_transit_maker"],
)
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base dataset builder classes for AstroWaveNet input pipelines."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import six
import tensorflow as tf
from tf_util import configdict
from astronet.ops import dataset_ops
@six.add_metaclass(abc.ABCMeta)
class DatasetBuilder(object):
"""Base class for building a dataset input pipeline for AstroWaveNet."""
def __init__(self, config_overrides=None):
"""Initializes the dataset builder.
Args:
config_overrides: Dict or ConfigDict containing overrides to the default
configuration.
"""
self.config = configdict.ConfigDict(self.default_config())
if config_overrides is not None:
self.config.update(config_overrides)
@staticmethod
def default_config():
"""Returns the default configuration as a ConfigDict or Python dict."""
return {}
@abc.abstractmethod
def build(self, batch_size):
"""Builds the dataset input pipeline.
Args:
batch_size: The number of input examples in each batch.
Returns:
A tf.data.Dataset object.
"""
raise NotImplementedError
@six.add_metaclass(abc.ABCMeta)
class _ShardedDatasetBuilder(DatasetBuilder):
"""Abstract base class for a dataset consisting of sharded files."""
def __init__(self, file_pattern, mode, config_overrides=None, use_tpu=False):
"""Initializes the dataset builder.
Args:
file_pattern: File pattern matching input file shards, e.g.
"/tmp/train-?????-of-00100". May also be a comma-separated list of file
patterns.
mode: A tf.estimator.ModeKeys.
config_overrides: Dict or ConfigDict containing overrides to the default
configuration.
use_tpu: Whether to build the dataset for TPU.
"""
super(_ShardedDatasetBuilder, self).__init__(config_overrides)
self.file_pattern = file_pattern
self.mode = mode
self.use_tpu = use_tpu
@staticmethod
def default_config():
config = super(_ShardedDatasetBuilder,
_ShardedDatasetBuilder).default_config()
config.update({
"max_length": 1024,
"shuffle_values_buffer": 1000,
"num_parallel_parser_calls": 4,
"batches_buffer_size": None, # Defaults to max(1, 256 / batch_size).
})
return config
@abc.abstractmethod
def file_reader(self):
"""Returns a function that reads a single sharded file."""
raise NotImplementedError
@abc.abstractmethod
def create_example_parser(self):
"""Returns a function that parses a single tf.Example proto."""
raise NotImplementedError
def _batch_and_pad(self, dataset, batch_size):
"""Combines elements into batches of the same length, padding if needed."""
if self.use_tpu:
padded_length = self.config.max_length
if not padded_length:
raise ValueError("config.max_length is required when using TPU")
# Pad with zeros up to padded_length. Note that this will pad the
# "weights" Tensor with zeros as well, which ensures that padded elements
# do not contribute to the loss.
padded_shapes = {}
for name, shape in dataset.output_shapes.iteritems():
shape.assert_is_compatible_with([None, None]) # Expect a 2D sequence.
dims = shape.as_list()
dims[0] = padded_length
shape = tf.TensorShape(dims)
shape.assert_is_fully_defined()
padded_shapes[name] = shape
else:
# Pad each batch up to the maximum size of each dimension in the batch.
padded_shapes = dataset.output_shapes
return dataset.padded_batch(batch_size, padded_shapes)
def build(self, batch_size):
"""Builds the dataset input pipeline.
Args:
batch_size:
Returns:
A tf.data.Dataset.
Raises:
ValueError: If no files match self.file_pattern.
"""
file_patterns = self.file_pattern.split(",")
filenames = []
for p in file_patterns:
matches = tf.gfile.Glob(p)
if not matches:
raise ValueError("Found no input files matching {}".format(p))
filenames.extend(matches)
tf.logging.info(
"Building input pipeline from %d files matching patterns: %s",
len(filenames), file_patterns)
is_training = self.mode == tf.estimator.ModeKeys.TRAIN
# Create a string dataset of filenames, and possibly shuffle.
filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
if is_training and len(filenames) > 1:
filename_dataset = filename_dataset.shuffle(len(filenames))
# Read serialized Example protos.
dataset = filename_dataset.apply(
tf.contrib.data.parallel_interleave(
self.file_reader(), cycle_length=8, block_length=8, sloppy=True))
if is_training:
# Shuffle and repeat. Note that shuffle() is before repeat(), so elements
# are shuffled among each epoch of data, and not between epochs of data.
if self.config.shuffle_values_buffer > 0:
dataset = dataset.shuffle(self.config.shuffle_values_buffer)
dataset = dataset.repeat()
# Map the parser over the dataset.
dataset = dataset.map(
self.create_example_parser(),
num_parallel_calls=self.config.num_parallel_parser_calls)
def _prepare_wavenet_inputs(features):
"""Validates features, and clips lengths and adds weights if needed."""
# Validate feature names.
required_features = {"autoregressive_input", "conditioning_stack"}
allowed_features = required_features | {"weights"}
feature_names = features.keys()
if not required_features.issubset(feature_names):
raise ValueError("Features must contain all of: {}. Got: {}".format(
required_features, feature_names))
if not allowed_features.issuperset(feature_names):
raise ValueError("Features can only contain: {}. Got: {}".format(
allowed_features, feature_names))
output = {}
for name, value in features.items():
# Validate shapes. The output dimension is [num_samples, dim].
ndims = len(value.shape)
if ndims == 1:
# Add an extra dimension: [num_samples] -> [num_samples, 1].
value = tf.expand_dims(value, -1)
elif ndims != 2:
raise ValueError(
"Features should be 1D or 2D sequences. Got '{}' = {}".format(
name, value))
if self.config.max_length:
value = value[:self.config.max_length]
output[name] = value
if "weights" not in output:
output["weights"] = tf.ones_like(output["autoregressive_input"])
return output
dataset = dataset.map(_prepare_wavenet_inputs)
# Batch results by up to batch_size.
dataset = self._batch_and_pad(dataset, batch_size)
if is_training:
# The dataset repeats infinitely before batching, so each batch has the
# maximum number of elements.
dataset = dataset_ops.set_batch_size(dataset, batch_size)
elif self.use_tpu and self.mode == tf.estimator.ModeKeys.EVAL:
# Pad to ensure that each batch has the same number of elements.
dataset = dataset_ops.pad_dataset_to_batch_size(dataset, batch_size)
# Prefetch batches.
buffer_size = (
self.config.batches_buffer_size or max(1, int(256 / batch_size)))
dataset = dataset.prefetch(buffer_size)
return dataset
def tfrecord_reader(filename):
"""Returns a tf.data.Dataset that reads a single TFRecord file shard."""
return tf.data.TFRecordDataset(filename, buffer_size=16 * 1000 * 1000)
class TFRecordDataset(_ShardedDatasetBuilder):
"""Builder for a dataset consisting of TFRecord files."""
def file_reader(self):
"""Returns a function that reads a single file shard."""
return tfrecord_reader
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for base.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
from absl import flags
import numpy as np
import tensorflow as tf
from astrowavenet.data import base
FLAGS = flags.FLAGS
flags.DEFINE_string("test_srcdir", "", "Test source directory.")
_TEST_TFRECORD_FILE = "astrowavenet/data/test_data/test-dataset.tfrecord"
class TFRecordDataset(base.TFRecordDataset):
"""Concrete subclass of TFRecordDataset for testing."""
@staticmethod
def default_config():
config = super(TFRecordDataset, TFRecordDataset).default_config()
config.update({
"shuffle_values_buffer": 0, # Ensure deterministic output.
"input_dim": 1,
"conditioning_dim": 1,
"include_weights": False,
})
return config
def create_example_parser(self):
"""Returns a function that parses a single tf.Example proto."""
def _example_parser(serialized_example):
"""Parses a single tf.Example into feature and label Tensors."""
features = tf.parse_single_example(
serialized_example,
features={
"feature_1": tf.VarLenFeature(tf.float32),
"feature_2": tf.VarLenFeature(tf.float32),
"feature_3": tf.VarLenFeature(tf.float32),
"feature_4": tf.VarLenFeature(tf.float32),
"weights": tf.VarLenFeature(tf.float32),
})
output = {}
if self.config.input_dim == 1:
# Shape = [num_samples].
output["autoregressive_input"] = features["feature_1"].values
elif self.config.input_dim == 2:
# Shape = [num_samples, 2].
output["autoregressive_input"] = tf.stack(
[features["feature_1"].values, features["feature_2"].values],
axis=-1)
else:
raise ValueError("Unexpected input_dim: {}".format(
self.config.input_dim))
if self.config.conditioning_dim == 1:
# Shape = [num_samples].
output["conditioning_stack"] = features["feature_3"].values
elif self.config.conditioning_dim == 2:
# Shape = [num_samples, 2].
output["conditioning_stack"] = tf.stack(
[features["feature_3"].values, features["feature_4"].values],
axis=-1)
else:
raise ValueError("Unexpected conditioning_dim: {}".format(
self.config.conditioning_dim))
if self.config.include_weights:
output["weights"] = features["weights"].values
return output
return _example_parser
class TFRecordDatasetTest(tf.test.TestCase):
def setUp(self):
super(TFRecordDatasetTest, self).setUp()
# The test dataset contains 8 tensorflow.Example protocol buffers. The i-th
# Example contains the following features:
# feature_1 = range(10, 10 + i + 1)
# feature_2 = range(20, 20 + i + 1)
# feature_3 = range(30, 30 + i + 1)
# feature_4 = range(40, 40 + i + 1)
# weights = [0] * i + [1]
self._file_pattern = os.path.join(FLAGS.test_srcdir, _TEST_TFRECORD_FILE)
def testTrainMode(self):
builder = TFRecordDataset(self._file_pattern, tf.estimator.ModeKeys.TRAIN)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["weights"].shape.assert_is_compatible_with([5, 1, None])
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0]],
[[10], [11], [12], [0], [0]],
[[10], [11], [12], [13], [0]],
[[10], [11], [12], [13], [14]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0]],
[[30], [31], [32], [0], [0]],
[[30], [31], [32], [33], [0]],
[[30], [31], [32], [33], [34]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0]],
[[1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15], [0], [0]],
[[10], [11], [12], [13], [14], [15], [16], [0]],
[[10], [11], [12], [13], [14], [15], [16], [17]],
[[10], [0], [0], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35], [0], [0]],
[[30], [31], [32], [33], [34], [35], [36], [0]],
[[30], [31], [32], [33], [34], [35], [36], [37]],
[[30], [0], [0], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1], [1], [1]],
[[1], [0], [0], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0], [0], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [0], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0], [0]],
[[10], [11], [12], [13], [14], [0], [0]],
[[10], [11], [12], [13], [14], [15], [0]],
[[10], [11], [12], [13], [14], [15], [16]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [0], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0], [0]],
[[30], [31], [32], [33], [34], [0], [0]],
[[30], [31], [32], [33], [34], [35], [0]],
[[30], [31], [32], [33], [34], [35], [36]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [0], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1], [1]],
], features["weights"])
def testTrainModeReadWeights(self):
config_overrides = {"include_weights": True}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.TRAIN,
config_overrides=config_overrides)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["weights"].shape.assert_is_compatible_with([5, None, 1])
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0]],
[[10], [11], [12], [0], [0]],
[[10], [11], [12], [13], [0]],
[[10], [11], [12], [13], [14]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0]],
[[30], [31], [32], [0], [0]],
[[30], [31], [32], [33], [0]],
[[30], [31], [32], [33], [34]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0]],
[[0], [1], [0], [0], [0]],
[[0], [0], [1], [0], [0]],
[[0], [0], [0], [1], [0]],
[[0], [0], [0], [0], [1]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15], [0], [0]],
[[10], [11], [12], [13], [14], [15], [16], [0]],
[[10], [11], [12], [13], [14], [15], [16], [17]],
[[10], [0], [0], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35], [0], [0]],
[[30], [31], [32], [33], [34], [35], [36], [0]],
[[30], [31], [32], [33], [34], [35], [36], [37]],
[[30], [0], [0], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[0], [0], [0], [0], [0], [1], [0], [0]],
[[0], [0], [0], [0], [0], [0], [1], [0]],
[[0], [0], [0], [0], [0], [0], [0], [1]],
[[1], [0], [0], [0], [0], [0], [0], [0]],
[[0], [1], [0], [0], [0], [0], [0], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [0], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0], [0]],
[[10], [11], [12], [13], [14], [0], [0]],
[[10], [11], [12], [13], [14], [15], [0]],
[[10], [11], [12], [13], [14], [15], [16]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [0], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0], [0]],
[[30], [31], [32], [33], [34], [0], [0]],
[[30], [31], [32], [33], [34], [35], [0]],
[[30], [31], [32], [33], [34], [35], [36]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[0], [0], [1], [0], [0], [0], [0]],
[[0], [0], [0], [1], [0], [0], [0]],
[[0], [0], [0], [0], [1], [0], [0]],
[[0], [0], [0], [0], [0], [1], [0]],
[[0], [0], [0], [0], [0], [0], [1]],
], features["weights"])
def testTrainMode2DInput(self):
config_overrides = {"input_dim": 2}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.TRAIN,
config_overrides=config_overrides)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 2])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["weights"].shape.assert_is_compatible_with([5, 1, None])
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10, 20], [0, 0], [0, 0], [0, 0], [0, 0]],
[[10, 20], [11, 21], [0, 0], [0, 0], [0, 0]],
[[10, 20], [11, 21], [12, 22], [0, 0], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0]],
[[30], [31], [32], [0], [0]],
[[30], [31], [32], [33], [0]],
[[30], [31], [32], [33], [34]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1, 1], [0, 0], [0, 0], [0, 0], [0, 0]],
[[1, 1], [1, 1], [0, 0], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [15, 25], [0, 0],
[0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [15, 25], [16, 26],
[0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [15, 25], [16, 26],
[17, 27]],
[[10, 20], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[10, 20], [11, 21], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35], [0], [0]],
[[30], [31], [32], [33], [34], [35], [36], [0]],
[[30], [31], [32], [33], [34], [35], [36], [37]],
[[30], [0], [0], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
[[1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10, 20], [11, 21], [12, 22], [0, 0], [0, 0], [0, 0], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [0, 0], [0, 0], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [0, 0], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [15, 25], [0, 0]],
[[10, 20], [11, 21], [12, 22], [13, 23], [14, 24], [15, 25], [16, 26]
],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [0], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0], [0]],
[[30], [31], [32], [33], [34], [0], [0]],
[[30], [31], [32], [33], [34], [35], [0]],
[[30], [31], [32], [33], [34], [35], [36]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1, 1], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0]],
[[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]],
], features["weights"])
def testTrainMode2DConditioning(self):
config_overrides = {"conditioning_dim": 2}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.TRAIN,
config_overrides=config_overrides)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 2])
next_features["weights"].shape.assert_is_compatible_with([5, 1, None])
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0]],
[[10], [11], [12], [0], [0]],
[[10], [11], [12], [13], [0]],
[[10], [11], [12], [13], [14]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30, 40], [0, 0], [0, 0], [0, 0], [0, 0]],
[[30, 40], [31, 41], [0, 0], [0, 0], [0, 0]],
[[30, 40], [31, 41], [32, 42], [0, 0], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0]],
[[1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15], [0], [0]],
[[10], [11], [12], [13], [14], [15], [16], [0]],
[[10], [11], [12], [13], [14], [15], [16], [17]],
[[10], [0], [0], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [35, 45], [0, 0],
[0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [35, 45], [36, 46],
[0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [35, 45], [36, 46],
[37, 47]],
[[30, 40], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[30, 40], [31, 41], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1], [1], [1]],
[[1], [0], [0], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0], [0], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [0], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0], [0]],
[[10], [11], [12], [13], [14], [0], [0]],
[[10], [11], [12], [13], [14], [15], [0]],
[[10], [11], [12], [13], [14], [15], [16]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30, 40], [31, 41], [32, 42], [0, 0], [0, 0], [0, 0], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [0, 0], [0, 0], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [0, 0], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [35, 45], [0, 0]],
[[30, 40], [31, 41], [32, 42], [33, 43], [34, 44], [35, 45], [36, 46]
],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [0], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1], [1]],
], features["weights"])
def testTrainModeMaxLength(self):
config_overrides = {"max_length": 6}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.TRAIN,
config_overrides=config_overrides)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["weights"].shape.assert_is_compatible_with([5, 1, None])
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0]],
[[10], [11], [12], [0], [0]],
[[10], [11], [12], [13], [0]],
[[10], [11], [12], [13], [14]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0]],
[[30], [31], [32], [0], [0]],
[[30], [31], [32], [33], [0]],
[[30], [31], [32], [33], [34]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0]],
[[1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[10], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[30], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[1], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0]],
[[10], [11], [12], [13], [14], [0]],
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0]],
[[30], [31], [32], [33], [34], [0]],
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
], features["weights"])
def testTrainModeTPU(self):
config_overrides = {"max_length": 6}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.TRAIN,
config_overrides=config_overrides,
use_tpu=True)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have fixed shape.
self.assertEqual([5, 6, 1], next_features["autoregressive_input"].shape)
self.assertEqual([5, 6, 1], next_features["conditioning_stack"].shape)
self.assertEqual([5, 6, 1], next_features["weights"].shape)
# Dataset repeats indefinitely.
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0]],
[[10], [11], [12], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0]],
[[10], [11], [12], [13], [14], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0]],
[[30], [31], [32], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0]],
[[30], [31], [32], [33], [34], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0]],
[[1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[10], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[30], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[1], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0]],
], features["weights"])
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0]],
[[10], [11], [12], [13], [14], [0]],
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0]],
[[30], [31], [32], [33], [34], [0]],
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
], features["weights"])
def testEvalMode(self):
builder = TFRecordDataset(self._file_pattern, tf.estimator.ModeKeys.EVAL)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have dynamic length but fixed batch size and input dimension.
next_features["autoregressive_input"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["conditioning_stack"].shape.assert_is_compatible_with(
[5, None, 1])
next_features["weights"].shape.assert_is_compatible_with([5, 1, None])
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0]],
[[10], [11], [12], [0], [0]],
[[10], [11], [12], [13], [0]],
[[10], [11], [12], [13], [14]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0]],
[[30], [31], [32], [0], [0]],
[[30], [31], [32], [33], [0]],
[[30], [31], [32], [33], [34]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0]],
[[1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1]],
], features["weights"])
# Partial batch.
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15], [0], [0]],
[[10], [11], [12], [13], [14], [15], [16], [0]],
[[10], [11], [12], [13], [14], [15], [16], [17]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35], [0], [0]],
[[30], [31], [32], [33], [34], [35], [36], [0]],
[[30], [31], [32], [33], [34], [35], [36], [37]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [1], [1], [0]],
[[1], [1], [1], [1], [1], [1], [1], [1]],
], features["weights"])
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(next_features)
def testEvalModeTPU(self):
config_overrides = {"max_length": 6}
builder = TFRecordDataset(
self._file_pattern,
tf.estimator.ModeKeys.EVAL,
config_overrides=config_overrides,
use_tpu=True)
next_features = builder.build(5).make_one_shot_iterator().get_next()
self.assertItemsEqual(
["autoregressive_input", "conditioning_stack", "weights"],
next_features.keys())
# Features have fixed shape.
self.assertEqual([5, 6, 1], next_features["autoregressive_input"].shape)
self.assertEqual([5, 6, 1], next_features["conditioning_stack"].shape)
self.assertEqual([5, 6, 1], next_features["weights"].shape)
with self.test_session() as sess:
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [0], [0], [0], [0], [0]],
[[10], [11], [0], [0], [0], [0]],
[[10], [11], [12], [0], [0], [0]],
[[10], [11], [12], [13], [0], [0]],
[[10], [11], [12], [13], [14], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [0], [0], [0], [0], [0]],
[[30], [31], [0], [0], [0], [0]],
[[30], [31], [32], [0], [0], [0]],
[[30], [31], [32], [33], [0], [0]],
[[30], [31], [32], [33], [34], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [0], [0], [0], [0], [0]],
[[1], [1], [0], [0], [0], [0]],
[[1], [1], [1], [0], [0], [0]],
[[1], [1], [1], [1], [0], [0]],
[[1], [1], [1], [1], [1], [0]],
], features["weights"])
# Partial batch, padded.
features = sess.run(next_features)
np.testing.assert_almost_equal([
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[10], [11], [12], [13], [14], [15]],
[[0], [0], [0], [0], [0], [0]],
[[0], [0], [0], [0], [0], [0]],
], features["autoregressive_input"])
np.testing.assert_almost_equal([
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[30], [31], [32], [33], [34], [35]],
[[0], [0], [0], [0], [0], [0]],
[[0], [0], [0], [0], [0], [0]],
], features["conditioning_stack"])
np.testing.assert_almost_equal([
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[1], [1], [1], [1], [1], [1]],
[[0], [0], [0], [0], [0], [0]],
[[0], [0], [0], [0], [0], [0]],
], features["weights"])
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(next_features)
if __name__ == "__main__":
tf.test.main()
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Kepler light curve inputs to the AstroWaveNet model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from astrowavenet.data import base
COND_INPUT_KEY = "mask"
AR_INPUT_KEY = "flux"
class KeplerLightCurves(base.TFRecordDataset):
"""Kepler light curve inputs to the AstroWaveNet model."""
def create_example_parser(self):
def _example_parser(serialized):
"""Parses a single tf.Example proto."""
features = tf.parse_single_example(
serialized,
features={
AR_INPUT_KEY: tf.VarLenFeature(tf.float32),
COND_INPUT_KEY: tf.VarLenFeature(tf.int64),
})
# Extract values from SparseTensor objects.
autoregressive_input = features[AR_INPUT_KEY].values
conditioning_stack = tf.to_float(features[COND_INPUT_KEY].values)
return {
"autoregressive_input": autoregressive_input,
"conditioning_stack": conditioning_stack,
}
return _example_parser
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generates synthetic light curves with periodic transit-like dips.
See class docstring below for more information.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
class SyntheticTransitMaker(object):
"""Generates synthetic light curves with periodic transit-like dips.
These light curves are generated by thresholding noisy sine waves. Each time
random_light_curve is called, a thresholded sine wave is generated by sampling
parameters uniformly from the ranges specified below.
Attributes:
period_range: A tuple of positive values specifying the range of periods the
sine waves may take.
amplitude_range: A tuple of positive values specifying the range of
amplitudes the sine waves may take.
threshold_ratio_range: A tuple of values in [0, 1) specifying the range of
thresholds as a ratio of the sine wave amplitude.
phase_range: Tuple of values specifying the range of phases the sine wave
may take as a ratio of the sampled period. E.g. a sampled phase of 0.5
would translate the sine wave by half of the period. The most common
reason to override this would be to generate light curves
deterministically (with e.g. (0,0)).
noise_sd_range: A tuple of values in [0, 1) specifying the range of standard
deviations for the Gaussian noise applied to the sine wave.
"""
def __init__(self,
period_range=(0.5, 4),
amplitude_range=(1, 1),
threshold_ratio_range=(0, 0.99),
phase_range=(0, 1),
noise_sd_range=(0.1, 0.1)):
if threshold_ratio_range[0] < 0 or threshold_ratio_range[1] >= 1:
raise ValueError("Threshold ratio range must be in [0, 1). Got: {}."
.format(threshold_ratio_range))
if amplitude_range[0] <= 0:
raise ValueError(
"Amplitude range must only contain positive numbers. Got: {}.".format(
amplitude_range))
if period_range[0] <= 0:
raise ValueError(
"Period range must only contain positive numbers. Got: {}.".format(
period_range))
if noise_sd_range[0] < 0:
raise ValueError(
"Noise standard deviation range must be nonnegative. Got: {}.".format(
noise_sd_range))
for (start, end), name in [(period_range, "period"),
(amplitude_range, "amplitude"),
(threshold_ratio_range, "threshold ratio"),
(phase_range, "phase range"),
(noise_sd_range, "noise standard deviation")]:
if end < start:
raise ValueError(
"End of {} range may not be less than start. Got: ({}, {})".format(
name, start, end))
self.period_range = period_range
self.amplitude_range = amplitude_range
self.threshold_ratio_range = threshold_ratio_range
self.phase_range = phase_range
self.noise_sd_range = noise_sd_range
def random_light_curve(self, time, mask_prob=0):
"""Samples parameters and generates a light curve.
Args:
time: np.array, x-values to sample from the thresholded sine wave.
mask_prob: value in [0,1], probability an individual datapoint is set to
zero
Returns:
flux: np.array, values of the masked sampled light curve corresponding to
the provided time array.
mask: np.array of ones and zeros, with zeros indicating masking at the
respective position on the flux array.
"""
period = np.random.uniform(*self.period_range)
phase = np.random.uniform(*self.phase_range) * period
amplitude = np.random.uniform(*self.amplitude_range)
threshold = np.random.uniform(*self.threshold_ratio_range) * amplitude
sin_wave = np.sin(time / period - phase) * amplitude
flux = np.minimum(sin_wave, -threshold) + threshold
noise_sd = np.random.uniform(*self.noise_sd_range)
noise = np.random.normal(scale=noise_sd, size=(len(time),))
flux += noise
# Array of ones and zeros, where zeros indicate masking.
mask = np.random.random(len(time)) > mask_prob
mask = mask.astype(np.float)
return flux * mask, mask
def random_light_curve_generator(self, time, mask_prob=0):
"""Returns a generator function yielding random light curves.
Args:
time: An np.array of x-values to sample from the thresholded sine wave.
mask_prob: Value in [0,1], probability an individual datapoint is set to
zero.
Returns:
A generator yielding random light curves.
"""
def generator_fn():
while True:
yield self.random_light_curve(time, mask_prob)
return generator_fn
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for synthetic_transit_maker."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import absltest
import numpy as np
from astrowavenet.data import synthetic_transit_maker
class SyntheticTransitMakerTest(absltest.TestCase):
def testBadRangesRaiseExceptions(self):
# Period range cannot contain negative values.
with self.assertRaisesRegexp(ValueError, "Period"):
synthetic_transit_maker.SyntheticTransitMaker(period_range=(-1, 10))
# Amplitude range cannot contain negative values.
with self.assertRaisesRegexp(ValueError, "Amplitude"):
synthetic_transit_maker.SyntheticTransitMaker(amplitude_range=(-10, -1))
# Threshold ratio range must be contained in the half-open interval [0, 1).
with self.assertRaisesRegexp(ValueError, "Threshold ratio"):
synthetic_transit_maker.SyntheticTransitMaker(
threshold_ratio_range=(0, 1))
# Noise standard deviation range must only contain nonnegative values.
with self.assertRaisesRegexp(ValueError, "Noise standard deviation"):
synthetic_transit_maker.SyntheticTransitMaker(noise_sd_range=(-1, 1))
# End of range may not be less than start.
invalid_range = (0.2, 0.1)
range_args = [
"period_range", "threshold_ratio_range", "amplitude_range",
"noise_sd_range", "phase_range"
]
for range_arg in range_args:
with self.assertRaisesRegexp(ValueError, "may not be less"):
synthetic_transit_maker.SyntheticTransitMaker(
**{range_arg: invalid_range})
def testStochasticLightCurveGeneration(self):
transit_maker = synthetic_transit_maker.SyntheticTransitMaker()
time = np.arange(100)
flux, mask = transit_maker.random_light_curve(time, mask_prob=0.4)
self.assertEqual(len(flux), 100)
self.assertEqual(len(mask), 100)
def testDeterministicLightCurveGeneration(self):
gold_flux = np.array([
0., 0., 0., 0., 0., 0., 0., -0.85099258, -2.04776251, -2.65829632,
-2.53014378, -1.69530454, -0.36223792, 0., 0., 0., 0., 0., 0.,
-0.2110405, -1.57757635, -2.47528153, -2.67999913, -2.14061117,
-0.9918028, 0., 0., 0., 0., 0., 0., 0., -1.01475559, -2.15534176,
-2.68282928, -2.46550457, -1.55763357, -0.18591162, 0., 0., 0., 0., 0.,
0., -0.3870683, -1.71426199, -2.53849461, -2.65395535, -2.03181367,
-0.82741829, 0., 0., 0., 0., 0., 0., 0., -1.17380391, -2.2541162,
-2.69666588, -2.39094831, -1.41330116, -0.00784284, 0., 0., 0., 0., 0.,
0., -0.56063229, -1.84372452, -2.59152891, -2.61731875, -1.91465433,
-0.65899089, 0., 0., 0., 0., 0., 0., 0., -1.3275672, -2.34373163,
-2.69975648, -2.30674237, -1.26282489, 0., 0., 0., 0., 0., 0., 0.,
-0.73111006, -1.9654997, -2.63419424, -2.5702207, -1.78955328,
-0.48712456
])
# Use ranges containing one value for determinism.
transit_maker = synthetic_transit_maker.SyntheticTransitMaker(
period_range=(2, 2),
amplitude_range=(3, 3),
threshold_ratio_range=(.1, .1),
phase_range=(0, 0),
noise_sd_range=(0, 0))
time = np.linspace(0, 100, 100)
flux, mask = transit_maker.random_light_curve(time)
np.testing.assert_array_almost_equal(flux, gold_flux)
np.testing.assert_array_almost_equal(mask, np.ones(100))
def testRandomLightCurveGenerator(self):
transit_maker = synthetic_transit_maker.SyntheticTransitMaker()
time = np.linspace(0, 100, 100)
generator = transit_maker.random_light_curve_generator(
time, mask_prob=0.3)()
for _ in range(5):
flux, mask = next(generator)
self.assertEqual(len(flux), 100)
self.assertEqual(len(mask), 100)
if __name__ == "__main__":
absltest.main()
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Synthetic transit inputs to the AstroWaveNet model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tf_util import configdict
from astrowavenet.data import base
from astrowavenet.data import synthetic_transit_maker
def _prepare_wavenet_inputs(light_curve, mask):
"""Gathers synthetic transits into the format expected by AstroWaveNet."""
return {
"autoregressive_input": tf.expand_dims(light_curve, -1),
"conditioning_stack": tf.expand_dims(mask, -1),
}
class SyntheticTransits(base.DatasetBuilder):
"""Synthetic transit inputs to the AstroWaveNet model."""
@staticmethod
def default_config():
return configdict.ConfigDict({
"period_range": (0.5, 4),
"amplitude_range": (1, 1),
"threshold_ratio_range": (0, 0.99),
"phase_range": (0, 1),
"noise_sd_range": (0.1, 0.1),
"mask_probability": 0.1,
"light_curve_time_range": (0, 100),
"light_curve_num_points": 1000
})
def build(self, batch_size):
transit_maker = synthetic_transit_maker.SyntheticTransitMaker(
period_range=self.config.period_range,
amplitude_range=self.config.amplitude_range,
threshold_ratio_range=self.config.threshold_ratio_range,
phase_range=self.config.phase_range,
noise_sd_range=self.config.noise_sd_range)
t_start, t_end = self.config.light_curve_time_range
time = np.linspace(t_start, t_end, self.config.light_curve_num_points)
dataset = tf.data.Dataset.from_generator(
transit_maker.random_light_curve_generator(
time, mask_prob=self.config.mask_probability),
output_types=(tf.float32, tf.float32),
output_shapes=(tf.TensorShape((self.config.light_curve_num_points,)),
tf.TensorShape((self.config.light_curve_num_points,))))
dataset = dataset.map(_prepare_wavenet_inputs)
dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.prefetch(-1)
return dataset
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script for training and evaluating AstroWaveNet models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os.path
from absl import flags
import tensorflow as tf
from astrowavenet import astrowavenet_model
from astrowavenet import configurations
from astrowavenet.data import kepler_light_curves
from astrowavenet.data import synthetic_transits
from astrowavenet.util import estimator_util
from tf_util import config_util
from tf_util import configdict
from tf_util import estimator_runner
FLAGS = flags.FLAGS
flags.DEFINE_enum("dataset", None,
["synthetic_transits", "kepler_light_curves"],
"Dataset for training and/or evaluation.")
flags.DEFINE_string("model_dir", None, "Base output directory.")
flags.DEFINE_string(
"train_files", None,
"Comma-separated list of file patterns matching the TFRecord files in the "
"training dataset.")
flags.DEFINE_string(
"eval_files", None,
"Comma-separated list of file patterns matching the TFRecord files in the "
"evaluation dataset.")
flags.DEFINE_string("config_name", "base",
"Name of the AstroWaveNet configuration.")
flags.DEFINE_string(
"config_overrides", "{}",
"JSON string or JSON file containing overrides to the base configuration.")
flags.DEFINE_enum("schedule", None,
["train", "train_and_eval", "continuous_eval"],
"Schedule for running the model.")
flags.DEFINE_string("eval_name", "val", "Name of the evaluation task.")
flags.DEFINE_integer("train_steps", None, "Total number of steps for training.")
flags.DEFINE_integer("eval_steps", None, "Number of steps for each evaluation.")
flags.DEFINE_integer(
"local_eval_frequency", 1000,
"The number of training steps in between evaluation runs. Only applies "
"when schedule == 'train_and_eval'.")
flags.DEFINE_integer("save_summary_steps", None,
"The frequency at which to save model summaries.")
flags.DEFINE_integer("save_checkpoints_steps", None,
"The frequency at which to save model checkpoints.")
flags.DEFINE_integer("save_checkpoints_secs", None,
"The frequency at which to save model checkpoints.")
flags.DEFINE_integer("keep_checkpoint_max", 1,
"The maximum number of model checkpoints to keep.")
# ------------------------------------------------------------------------------
# TPU-only flags
# ------------------------------------------------------------------------------
flags.DEFINE_boolean("use_tpu", False, "Whether to execute on TPU.")
flags.DEFINE_string("master", None, "Address of the TensorFlow TPU master.")
flags.DEFINE_integer("tpu_num_shards", 8, "Number of TPU shards.")
flags.DEFINE_integer("tpu_iterations_per_loop", 1000,
"Number of iterations per TPU training loop.")
flags.DEFINE_integer(
"eval_batch_size", None,
"Batch size for TPU evaluation. Defaults to the training batch size.")
def _create_run_config():
"""Creates a TPU RunConfig if FLAGS.use_tpu is True, else a RunConfig."""
session_config = tf.ConfigProto(allow_soft_placement=True)
run_config_kwargs = {
"save_summary_steps": FLAGS.save_summary_steps,
"save_checkpoints_steps": FLAGS.save_checkpoints_steps,
"save_checkpoints_secs": FLAGS.save_checkpoints_secs,
"session_config": session_config,
"keep_checkpoint_max": FLAGS.keep_checkpoint_max
}
if FLAGS.use_tpu:
if not FLAGS.master:
raise ValueError("FLAGS.master must be set for TPUEstimator.")
tpu_config = tf.contrib.tpu.TPUConfig(
iterations_per_loop=FLAGS.tpu_iterations_per_loop,
num_shards=FLAGS.tpu_num_shards,
per_host_input_for_training=(FLAGS.tpu_num_shards <= 8))
run_config = tf.contrib.tpu.RunConfig(
tpu_config=tpu_config, master=FLAGS.master, **run_config_kwargs)
else:
if FLAGS.master:
raise ValueError("FLAGS.master should only be set for TPUEstimator.")
run_config = tf.estimator.RunConfig(**run_config_kwargs)
return run_config
def _get_file_pattern(mode):
"""Gets the value of the file pattern flag for the specified mode."""
flag_name = ("train_files"
if mode == tf.estimator.ModeKeys.TRAIN else "eval_files")
file_pattern = FLAGS[flag_name].value
if file_pattern is None:
raise ValueError("--{} is required for mode '{}'".format(flag_name, mode))
return file_pattern
def _create_dataset_builder(mode, config_overrides=None):
"""Creates a dataset builder for the input pipeline."""
if FLAGS.dataset == "synthetic_transits":
return synthetic_transits.SyntheticTransits(config_overrides)
file_pattern = _get_file_pattern(mode)
if FLAGS.dataset == "kepler_light_curves":
builder_class = kepler_light_curves.KeplerLightCurves
else:
raise ValueError("Unsupported dataset: {}".format(FLAGS.dataset))
return builder_class(
file_pattern,
mode,
config_overrides=config_overrides,
use_tpu=FLAGS.use_tpu)
def _create_input_fn(mode, config_overrides=None):
"""Creates an Estimator input_fn."""
builder = _create_dataset_builder(mode, config_overrides)
tf.logging.info("Dataset config for mode '%s': %s", mode,
config_util.to_json(builder.config))
return estimator_util.create_input_fn(builder)
def _create_eval_args(config_overrides=None):
"""Builds eval_args for estimator_runner.evaluate()."""
if FLAGS.dataset == "synthetic_transits" and not FLAGS.eval_steps:
raise ValueError("Dataset '{}' requires --eval_steps for evaluation".format(
FLAGS.dataset))
input_fn = _create_input_fn(tf.estimator.ModeKeys.EVAL, config_overrides)
return {FLAGS.eval_name: (input_fn, FLAGS.eval_steps)}
def main(argv):
del argv # Unused.
config = configdict.ConfigDict(configurations.get_config(FLAGS.config_name))
config_overrides = json.loads(FLAGS.config_overrides)
for key in config_overrides:
if key not in ["dataset", "hparams"]:
raise ValueError("Unrecognized config override: {}".format(key))
config.hparams.update(config_overrides.get("hparams", {}))
# Log configs.
configs_json = [
("config_overrides", config_util.to_json(config_overrides)),
("config", config_util.to_json(config)),
]
for config_name, config_json in configs_json:
tf.logging.info("%s: %s", config_name, config_json)
# Create the estimator.
run_config = _create_run_config()
estimator = estimator_util.create_estimator(
astrowavenet_model.AstroWaveNet, config.hparams, run_config,
FLAGS.model_dir, FLAGS.eval_batch_size)
if FLAGS.schedule in ["train", "train_and_eval"]:
# Save configs.
tf.gfile.MakeDirs(FLAGS.model_dir)
for config_name, config_json in configs_json:
filename = os.path.join(FLAGS.model_dir, "{}.json".format(config_name))
with tf.gfile.Open(filename, "w") as f:
f.write(config_json)
train_input_fn = _create_input_fn(tf.estimator.ModeKeys.TRAIN,
config_overrides.get("dataset"))
train_hooks = []
if FLAGS.schedule == "train":
estimator.train(
train_input_fn, hooks=train_hooks, max_steps=FLAGS.train_steps)
else:
assert FLAGS.schedule == "train_and_eval"
eval_args = _create_eval_args(config_overrides.get("dataset"))
for _ in estimator_runner.continuous_train_and_eval(
estimator=estimator,
train_input_fn=train_input_fn,
eval_args=eval_args,
local_eval_frequency=FLAGS.local_eval_frequency,
train_hooks=train_hooks,
train_steps=FLAGS.train_steps):
# continuous_train_and_eval() yields evaluation metrics after each
# FLAGS.local_eval_frequency. It also saves and logs them, so we don't
# do anything here.
pass
else:
assert FLAGS.schedule == "continuous_eval"
eval_args = _create_eval_args(config_overrides.get("dataset"))
for _ in estimator_runner.continuous_eval(
estimator=estimator, eval_args=eval_args,
train_steps=FLAGS.train_steps):
# continuous_train_and_eval() yields evaluation metrics after each
# checkpoint. It also saves and logs them, so we don't do anything here.
pass
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
flags.mark_flags_as_required(["dataset", "model_dir", "schedule"])
def _validate_schedule(flag_values):
"""Validates the --schedule flag and the flags it interacts with."""
schedule = flag_values["schedule"]
save_checkpoints_steps = flag_values["save_checkpoints_steps"]
save_checkpoints_secs = flag_values["save_checkpoints_secs"]
if schedule in ["train", "train_and_eval"]:
if not (save_checkpoints_steps or save_checkpoints_secs):
raise flags.ValidationError(
"--schedule='%s' requires --save_checkpoints_steps or "
"--save_checkpoints_secs." % schedule)
return True
flags.register_multi_flags_validator(
["schedule", "save_checkpoints_steps", "save_checkpoints_secs"],
_validate_schedule)
tf.app.run()
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
py_library(
name = "estimator_util",
srcs = ["estimator_util.py"],
srcs_version = "PY2AND3",
deps = ["//astronet/ops:training"],
)
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions for creating a TensorFlow Estimator."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import tensorflow as tf
from astronet.ops import training
class _InputFn(object):
"""Class that acts as a callable input function for Estimator train / eval."""
def __init__(self, dataset_builder):
"""Initializes the input function.
Args:
dataset_builder: Instance of DatasetBuilder.
"""
self._builder = dataset_builder
def __call__(self, params):
"""Builds the input pipeline."""
return self._builder.build(batch_size=params["batch_size"])
def create_input_fn(dataset_builder):
"""Creates an input_fn that that builds an input pipeline.
Args:
dataset_builder: Instance of DatasetBuilder.
Returns:
A callable that builds an input pipeline and returns a tf.data.Dataset
object.
"""
return _InputFn(dataset_builder)
class _ModelFn(object):
"""Class that acts as a callable model function for Estimator train / eval."""
def __init__(self, model_class, hparams, use_tpu=False):
"""Initializes the model function.
Args:
model_class: Model class.
hparams: A HParams object containing hyperparameters for building and
training the model.
use_tpu: If True, a TPUEstimator will be returned. Otherwise an Estimator
will be returned.
"""
self._model_class = model_class
self._base_hparams = hparams
self._use_tpu = use_tpu
def __call__(self, features, mode, params):
"""Builds the model and returns an EstimatorSpec or TPUEstimatorSpec."""
hparams = copy.deepcopy(self._base_hparams)
if "batch_size" in params:
hparams.batch_size = params["batch_size"]
model = self._model_class(features, hparams, mode)
model.build()
# Possibly create train_op.
use_tpu = self._use_tpu
train_op = None
if mode == tf.estimator.ModeKeys.TRAIN:
learning_rate = training.create_learning_rate(hparams, model.global_step)
optimizer = training.create_optimizer(hparams, learning_rate, use_tpu)
train_op = training.create_train_op(model, optimizer)
if use_tpu:
estimator = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode, loss=model.total_loss, train_op=train_op)
else:
estimator = tf.estimator.EstimatorSpec(
mode=mode, loss=model.total_loss, train_op=train_op)
return estimator
def create_model_fn(model_class, hparams, use_tpu=False):
"""Wraps model_class as an Estimator or TPUEstimator model_fn.
Args:
model_class: AstroModel or a subclass.
hparams: ConfigDict of configuration parameters for building the model.
use_tpu: If True, a TPUEstimator model_fn is returned. Otherwise an
Estimator model_fn is returned.
Returns:
model_fn: A callable that constructs the model and returns a
TPUEstimatorSpec if use_tpu is True, otherwise an EstimatorSpec.
"""
return _ModelFn(model_class, hparams, use_tpu)
def create_estimator(model_class,
hparams,
run_config=None,
model_dir=None,
eval_batch_size=None):
"""Wraps model_class as an Estimator or TPUEstimator.
If run_config is None or a tf.estimator.RunConfig, an Estimator is returned.
If run_config is a tf.contrib.tpu.RunConfig, a TPUEstimator is returned.
Args:
model_class: AstroWaveNet or a subclass.
hparams: ConfigDict of configuration parameters for building the model.
run_config: Optional tf.estimator.RunConfig or tf.contrib.tpu.RunConfig.
model_dir: Optional directory for saving the model. If not passed
explicitly, it must be specified in run_config.
eval_batch_size: Optional batch size for evaluation on TPU. Only applicable
if run_config is a tf.contrib.tpu.RunConfig. Defaults to
hparams.batch_size.
Returns:
An Estimator object if run_config is None or a tf.estimator.RunConfig, or a
TPUEstimator object if run_config is a tf.contrib.tpu.RunConfig.
Raises:
ValueError:
If model_dir is not passed explicitly or in run_config.model_dir, or if
eval_batch_size is specified and run_config is not a
tf.contrib.tpu.RunConfig.
"""
if run_config is None:
run_config = tf.estimator.RunConfig()
else:
run_config = copy.deepcopy(run_config)
if not model_dir and not run_config.model_dir:
raise ValueError(
"model_dir must be passed explicitly or specified in run_config")
use_tpu = isinstance(run_config, tf.contrib.tpu.RunConfig)
model_fn = create_model_fn(model_class, hparams, use_tpu)
if use_tpu:
eval_batch_size = eval_batch_size or hparams.batch_size
estimator = tf.contrib.tpu.TPUEstimator(
model_fn=model_fn,
model_dir=model_dir,
config=run_config,
train_batch_size=hparams.batch_size,
eval_batch_size=eval_batch_size)
else:
if eval_batch_size is not None:
raise ValueError("eval_batch_size can only be specified for TPU.")
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir=model_dir,
config=run_config,
params={"batch_size": hparams.batch_size})
return estimator
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
py_library(
name = "kepler_io",
srcs = ["kepler_io.py"],
srcs_version = "PY2AND3",
deps = [":util"],
)
py_test(
name = "kepler_io_test",
size = "small",
srcs = ["kepler_io_test.py"],
data = glob([
"test_data/0114/011442793/kplr*.fits",
]),
srcs_version = "PY2AND3",
deps = [":kepler_io"],
)
py_library(
name = "median_filter",
srcs = ["median_filter.py"],
srcs_version = "PY2AND3",
)
py_test(
name = "median_filter_test",
size = "small",
srcs = ["median_filter_test.py"],
srcs_version = "PY2AND3",
deps = [":median_filter"],
)
py_library(
name = "periodic_event",
srcs = ["periodic_event.py"],
srcs_version = "PY2AND3",
)
py_test(
name = "periodic_event_test",
size = "small",
srcs = ["periodic_event_test.py"],
srcs_version = "PY2AND3",
deps = [":periodic_event"],
)
py_library(
name = "util",
srcs = ["util.py"],
srcs_version = "PY2AND3",
)
py_test(
name = "util_test",
size = "small",
srcs = ["util_test.py"],
srcs_version = "PY2AND3",
deps = [
":periodic_event",
":util",
],
)
# Light Curve Operations
## Code Author
Chris Shallue: [@cshallue](https://github.com/cshallue)
## Python modules
* `kepler_io`: Functions for reading Kepler data.
* `median_filter`: Utility for smoothing data using a median filter.
* `periodic_event`: Event class, which represents a periodic event in a light curve.
* `util`: Light curve utility functions.
## Fast ops
The [fast_ops](fast_ops/) subdirectory contains optimized C++ light curve
operations. These operations can be compiled for Python using
[CLIF](https://github.com/google/clif). The [fast_ops/python](fast_ops/python/)
directory contains CLIF API description files.
# Copyright 2018 The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0
cc_library(
name = "median",
hdrs = ["median.h"],
)
cc_test(
name = "median_test",
size = "small",
srcs = [
"median_test.cc",
],
deps = [
":median",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "median_filter",
srcs = ["median_filter.cc"],
hdrs = ["median_filter.h"],
deps = [
":median",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "median_filter_test",
size = "small",
srcs = [
"median_filter_test.cc",
],
deps = [
":median_filter",
":test_util",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "phase_fold",
srcs = ["phase_fold.cc"],
hdrs = ["phase_fold.h"],
deps = ["@com_google_absl//absl/strings"],
)
cc_test(
name = "phase_fold_test",
size = "small",
srcs = [
"phase_fold_test.cc",
],
deps = [
":phase_fold",
":test_util",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "normalize",
srcs = ["normalize.cc"],
hdrs = ["normalize.h"],
deps = [
":median",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "normalize_test",
size = "small",
srcs = [
"normalize_test.cc",
],
deps = [
":normalize",
":test_util",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "view_generator",
srcs = ["view_generator.cc"],
hdrs = ["view_generator.h"],
deps = [
":median_filter",
":normalize",
":phase_fold",
"@com_google_absl//absl/memory",
],
)
cc_test(
name = "view_generator_test",
size = "small",
srcs = [
"view_generator_test.cc",
],
deps = [
":test_util",
":view_generator",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "test_util",
hdrs = ["test_util.h"],
deps = [
"@com_google_googletest//:gtest",
],
)
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_H_
#define TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_H_
#include <algorithm>
#include <iterator>
#include <vector>
namespace astronet {
// Computes the median value in the range [first, last).
//
// After calling this function, the elements in [first, last) will be rearranged
// such that, if middle = first + distance(first, last) / 2:
// 1. The element pointed at by middle is changed to whatever element would
// occur in that position if [first, last) was sorted.
// 2. All of the elements before this new middle element are less than or
// equal to the elements after the new nth element.
template <class RandomIt>
typename std::iterator_traits<RandomIt>::value_type InPlaceMedian(
RandomIt first, RandomIt last) {
// If n is odd, 'middle' points to the middle element. If n is even, 'middle'
// points to the upper middle element.
const auto n = std::distance(first, last);
const auto middle = first + (n / 2);
// Partially sort such that 'middle' in its place.
std::nth_element(first, middle, last);
// n is odd: the median is simply the middle element.
if (n & 1) {
return *middle;
}
// The maximum value lower than *middle is located in [first, middle) as a
// a post condition of nth_element.
const auto lower_middle = std::max_element(first, middle);
// Prevent overflow. We know that *lower_middle <= *middle. If both are on
// opposite sides of zero, the sum won't overflow, otherwise the difference
// won't overflow.
if (*lower_middle <= 0 && *middle >= 0) {
return (*lower_middle + *middle) / 2;
}
return *lower_middle + (*middle - *lower_middle) / 2;
}
// Computes the median value in the range [first, last) without modifying the
// input.
template <class ForwardIterator>
typename std::iterator_traits<ForwardIterator>::value_type Median(
ForwardIterator first, ForwardIterator last) {
std::vector<typename std::iterator_traits<ForwardIterator>::value_type>
values(first, last);
return InPlaceMedian(values.begin(), values.end());
}
} // namespace astronet
#endif // TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_H_
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "light_curve/fast_ops/median_filter.h"
#include "absl/strings/substitute.h"
#include "light_curve/fast_ops/median.h"
using absl::Substitute;
using std::min;
using std::vector;
namespace astronet {
bool MedianFilter(const vector<double>& x, const vector<double>& y,
int num_bins, double bin_width, double x_min, double x_max,
vector<double>* result, std::string* error) {
const std::size_t x_size = x.size();
if (x_size < 2) {
*error = Substitute("x.size() must be greater than 1. Got: $0", x_size);
return false;
}
if (x_size != y.size()) {
*error = Substitute("x.size() (got: $0) must equal y.size() (got: $1)",
x_size, y.size());
return false;
}
const double x_first = x[0];
const double x_last = x[x_size - 1];
if (x_first >= x_last) {
*error = Substitute(
"The first element of x (got: $0) must be less than the last "
"element (got: $1). Either x is not sorted or all elements are "
"equal.",
x_first, x_last);
return false;
}
if (x_min >= x_max) {
*error = Substitute("x_min (got: $0) must be less than x_max (got: $1)",
x_min, x_max);
return false;
}
if (x_min > x_last) {
*error = Substitute(
"x_min (got: $0) must be less than or equal to the largest value of x "
"(got: $1)",
x_min, x_last);
return false;
}
if (bin_width <= 0) {
*error = Substitute("bin_width must be positive. Got: $0", bin_width);
return false;
}
if (bin_width >= x_max - x_min) {
*error = Substitute(
"bin_width (got: $0) must be less than x_max - x_min (got: $1)",
bin_width, x_max - x_min);
return false;
}
if (num_bins < 2) {
*error = Substitute("num_bins must be greater than 1. Got: $0", num_bins);
return false;
}
result->resize(num_bins);
// Compute the spacing between midpoints of adjacent bins.
double bin_spacing = (x_max - x_min - bin_width) / (num_bins - 1);
// Create a vector to hold the values of the current bin on each iteration.
// Its initial size is twice the expected number of points per bin if x
// values are uniformly spaced. It will be expanded as necessary.
int points_per_bin =
1 + static_cast<int>(x_size * min(1.0, bin_width / (x_last - x_first)));
vector<double> bin_values(2 * points_per_bin);
// Create a vector to hold the indices of any empty bins.
vector<int> empty_bins;
// Find the first element of x >= x_min. This loop is guaranteed to produce
// a valid index because we know that x_min <= x_last.
int x_start = 0;
while (x[x_start] < x_min) ++x_start;
// The bin at index i is the median of all elements y[j] such that
// bin_min <= x[j] < bin_max, where bin_min and bin_max are the endpoints of
// bin i.
double bin_min = x_min; // Left endpoint of the current bin.
double bin_max = x_min + bin_width; // Right endpoint of the current bin.
int j_start = x_start; // Index of the first element in the current bin.
int j = x_start; // Index of the current element in the current bin.
for (int i = 0; i < num_bins; ++i) {
// Move j_start to the first index of x >= bin_min.
while (j_start < x_size && x[j_start] < bin_min) ++j_start;
// Accumulate values y[j] such that bin_min <= x[j] < bin_max. After this
// loop, j is the exclusive end index of the current bin.
j = j_start;
while (j < x_size && x[j] < bin_max) {
if (j - j_start >= bin_values.size()) {
bin_values.resize(2 * bin_values.size()); // Expand if necessary.
}
bin_values[j - j_start] = y[j];
++j;
}
int n = j - j_start; // Number of points in the bin.
if (n == 0) {
empty_bins.push_back(i); // Empty bin.
} else {
// Compute and insert the median bin value.
(*result)[i] = InPlaceMedian(bin_values.begin(), bin_values.begin() + n);
}
// Advance the bin.
bin_min += bin_spacing;
bin_max += bin_spacing;
}
// For empty bins, fall back to the median y value between x_min and x_max.
if (!empty_bins.empty()) {
double median = Median(y.begin() + x_start, y.begin() + j);
for (int i : empty_bins) {
(*result)[i] = median;
}
}
return true;
}
} // namespace astronet
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_FILTER_H_
#define TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_FILTER_H_
#include <iostream>
#include <string>
#include <vector>
namespace astronet {
// Computes the median y-value in uniform intervals (bins) along the x-axis.
//
// The interval [x_min, x_max) is divided into num_bins uniformly spaced
// intervals of width bin_width. The value computed for each bin is the median
// of all y-values whose corresponding x-value is in the interval.
//
// NOTE: x must be sorted in ascending order or the results will be incorrect.
//
// Input args:
// x: Vector of x-coordinates sorted in ascending order. Must have at least 2
// elements, and all elements cannot be the same value.
// y: Vector of y-coordinates with the same size as x.
// num_bins: The number of intervals to divide the x-axis into. Must be at
// least 2.
// bin_width: The width of each bin on the x-axis. Must be positive, and less
// than x_max - x_min.
// x_min: The inclusive leftmost value to consider on the x-axis. Must be less
// than or equal to the largest value of x.
// x_max: The exclusive rightmost value to consider on the x-axis. Must be
// greater than x_min.
//
// Output args:
// result: Vector of size num_bins containing the median y-values of uniformly
// spaced bins on the x-axis.
// error: String indicating an error (e.g. an invalid argument).
//
// Returns:
// true if the algorithm succeeded. If false, see "error".
bool MedianFilter(const std::vector<double>& x, const std::vector<double>& y,
int num_bins, double bin_width, double x_min, double x_max,
std::vector<double>* result, std::string* error);
} // namespace astronet
#endif // TENSORFLOW_MODELS_ASTRONET_LIGHT_CURVE_FAST_OPS_MEDIAN_FILTER_H_
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "light_curve/fast_ops/median_filter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "light_curve/fast_ops/test_util.h"
using std::vector;
using testing::Pointwise;
namespace astronet {
namespace {
TEST(MedianFilter, Errors) {
vector<double> x;
vector<double> y;
vector<double> result;
std::string error;
// x size less than 2.
x = {1};
y = {2};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, 0, 2, &result, &error));
EXPECT_EQ(error, "x.size() must be greater than 1. Got: 1");
// x and y not the same size.
x = {1, 2};
y = {4, 5, 6};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, 0, 2, &result, &error));
EXPECT_EQ(error, "x.size() (got: 2) must equal y.size() (got: 3)");
// x out of order.
x = {2, 0, 1};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, 0, 2, &result, &error));
EXPECT_EQ(error,
"The first element of x (got: 2) must be less than the last element"
" (got: 1). Either x is not sorted or all elements are equal.");
// x all equal.
x = {1, 1, 1};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, 0, 2, &result, &error));
EXPECT_EQ(error,
"The first element of x (got: 1) must be less than the last element"
" (got: 1). Either x is not sorted or all elements are equal.");
// x_min not less than x_max
x = {1, 2, 3};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, -1, -1, &result, &error));
EXPECT_EQ(error, "x_min (got: -1) must be less than x_max (got: -1)");
// x_min greater than the last element of x.
x = {1, 2, 3};
EXPECT_FALSE(MedianFilter(x, y, 2, 0.25, 3.5, 4, &result, &error));
EXPECT_EQ(error,
"x_min (got: 3.5) must be less than or equal to the largest value "
"of x (got: 3)");
// bin_width nonpositive.
x = {1, 2, 3};
EXPECT_FALSE(MedianFilter(x, y, 2, 0, 1, 3, &result, &error));
EXPECT_EQ(error, "bin_width must be positive. Got: 0");
// bin_width greater than or equal to x_max - x_min.
x = {1, 2, 3};
EXPECT_FALSE(MedianFilter(x, y, 2, 1, 1.5, 2.5, &result, &error));
EXPECT_EQ(error,
"bin_width (got: 1) must be less than x_max - x_min (got: 1)");
// num_bins less than 2.
x = {1, 2, 3};
EXPECT_FALSE(MedianFilter(x, y, 1, 1, 0, 2, &result, &error));
EXPECT_EQ(error, "num_bins must be greater than 1. Got: 1");
}
TEST(MedianFilter, BucketBoundaries) {
vector<double> x = {-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6};
vector<double> y = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
vector<double> result;
std::string error;
EXPECT_TRUE(MedianFilter(x, y, 5, 2, -5, 5, &result, &error));
EXPECT_TRUE(error.empty());
vector<double> expected = {2.5, 4.5, 6.5, 8.5, 10.5};
EXPECT_THAT(result, Pointwise(DoubleNear(), expected));
}
TEST(MedianFilter, MultiSizeBins) {
// Construct bins with size 0, 1, 2, 3, 4, 5, 10, respectively.
vector<double> x = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5,
5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6};
vector<double> y = {0, -1, 1, 4, 5, 6, 2, 2, 4, 4, 1, 1, 1,
1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
vector<double> result;
std::string error;
EXPECT_TRUE(MedianFilter(x, y, 7, 1, 0, 7, &result, &error));
EXPECT_TRUE(error.empty());
// expected[0] = 3 is the median of y.
vector<double> expected = {3, 0, 0, 5, 3, 1, 5.5};
EXPECT_THAT(result, Pointwise(DoubleNear(), expected));
}
TEST(MedianFilter, EmptyBins) {
vector<double> x = {-1, 0, 1};
vector<double> y = {2, 3, 1};
vector<double> result;
std::string error;
EXPECT_TRUE(MedianFilter(x, y, 5, 1, -5, 5, &result, &error));
EXPECT_TRUE(error.empty());
// The center bin is the only nonempty bin.
vector<double> expected = {2, 2, 3, 2, 2};
EXPECT_THAT(result, Pointwise(DoubleNear(), expected));
}
TEST(MedianFilter, WideBins) {
vector<double> x = {-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6};
vector<double> y = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
vector<double> result;
std::string error;
EXPECT_TRUE(MedianFilter(x, y, 7, 5, -10, 10, &result, &error));
EXPECT_TRUE(error.empty());
vector<double> expected = {1, 2.5, 4, 7, 9, 11.5, 12.5};
EXPECT_THAT(result, Pointwise(DoubleNear(), expected));
}
TEST(MedianFilter, NarrowBins) {
vector<double> x = {-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6};
vector<double> y = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
vector<double> result;
std::string error;
EXPECT_TRUE(MedianFilter(x, y, 9, 0.5, -2.25, 2.25, &result, &error));
EXPECT_TRUE(error.empty());
// Bins 1, 3, 5, 7 are empty.
vector<double> expected = {5, 7, 6, 7, 7, 7, 8, 7, 9};
EXPECT_THAT(result, Pointwise(DoubleNear(), expected));
}
} // namespace
} // namespace astronet
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment