Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
ddee8c74
Commit
ddee8c74
authored
Jun 23, 2018
by
Chris Shallue
Committed by
Christopher Shallue
Jun 23, 2018
Browse files
Modularize light curve and TCE preprocessing functions for easier re-use.
PiperOrigin-RevId: 201839789
parent
672ac40b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
109 additions
and
73 deletions
+109
-73
research/astronet/astronet/data/BUILD
research/astronet/astronet/data/BUILD
+2
-1
research/astronet/astronet/data/generate_input_records.py
research/astronet/astronet/data/generate_input_records.py
+4
-51
research/astronet/astronet/data/preprocess.py
research/astronet/astronet/data/preprocess.py
+57
-19
research/astronet/third_party/kepler_spline/kepler_spline.py
research/astronet/third_party/kepler_spline/kepler_spline.py
+46
-2
No files found.
research/astronet/astronet/data/BUILD
View file @
ddee8c74
...
...
@@ -8,10 +8,11 @@ py_binary(
deps
=
[
":preprocess"
],
)
py_
bin
ary
(
py_
libr
ary
(
name
=
"preprocess"
,
srcs
=
[
"preprocess.py"
],
deps
=
[
"//astronet/util:example_util"
,
"//light_curve_util:kepler_io"
,
"//light_curve_util:median_filter"
,
"//light_curve_util:util"
,
...
...
research/astronet/astronet/data/generate_input_records.py
View file @
ddee8c74
...
...
@@ -131,25 +131,6 @@ _LABEL_COLUMN = "av_training_set"
_ALLOWED_LABELS
=
{
"PC"
,
"AFP"
,
"NTP"
}
def
_set_float_feature
(
ex
,
name
,
value
):
"""Sets the value of a float feature in a tensorflow.train.Example proto."""
assert
name
not
in
ex
.
features
.
feature
,
"Duplicate feature: %s"
%
name
ex
.
features
.
feature
[
name
].
float_list
.
value
.
extend
([
float
(
v
)
for
v
in
value
])
def
_set_bytes_feature
(
ex
,
name
,
value
):
"""Sets the value of a bytes feature in a tensorflow.train.Example proto."""
assert
name
not
in
ex
.
features
.
feature
,
"Duplicate feature: %s"
%
name
ex
.
features
.
feature
[
name
].
bytes_list
.
value
.
extend
([
str
(
v
).
encode
(
"latin-1"
)
for
v
in
value
])
def
_set_int64_feature
(
ex
,
name
,
value
):
"""Sets the value of an int64 feature in a tensorflow.train.Example proto."""
assert
name
not
in
ex
.
features
.
feature
,
"Duplicate feature: %s"
%
name
ex
.
features
.
feature
[
name
].
int64_list
.
value
.
extend
([
int
(
v
)
for
v
in
value
])
def
_process_tce
(
tce
):
"""Processes the light curve for a Kepler TCE and returns an Example proto.
...
...
@@ -158,39 +139,11 @@ def _process_tce(tce):
Returns:
A tensorflow.train.Example proto containing TCE features.
Raises:
IOError: If the light curve files for this Kepler ID cannot be found.
"""
# Read and process the light curve.
time
,
flux
=
preprocess
.
read_and_process_light_curve
(
tce
.
kepid
,
all_time
,
all_flux
=
preprocess
.
read_light_curve
(
tce
.
kepid
,
FLAGS
.
kepler_data_dir
)
time
,
flux
=
preprocess
.
phase_fold_and_sort_light_curve
(
time
,
flux
,
tce
.
tce_period
,
tce
.
tce_time0bk
)
# Generate the local and global views.
global_view
=
preprocess
.
global_view
(
time
,
flux
,
tce
.
tce_period
)
local_view
=
preprocess
.
local_view
(
time
,
flux
,
tce
.
tce_period
,
tce
.
tce_duration
)
# Make output proto.
ex
=
tf
.
train
.
Example
()
# Set time series features.
_set_float_feature
(
ex
,
"global_view"
,
global_view
)
_set_float_feature
(
ex
,
"local_view"
,
local_view
)
# Set other columns.
for
col_name
,
value
in
tce
.
items
():
if
np
.
issubdtype
(
type
(
value
),
np
.
integer
):
_set_int64_feature
(
ex
,
col_name
,
[
value
])
else
:
try
:
_set_float_feature
(
ex
,
col_name
,
[
float
(
value
)])
except
ValueError
:
_set_bytes_feature
(
ex
,
col_name
,
[
value
])
return
ex
time
,
flux
=
preprocess
.
process_light_curve
(
all_time
,
all_flux
)
return
preprocess
.
generate_example_for_tce
(
time
,
flux
,
tce
)
def
_process_file_shard
(
tce_table
,
file_name
):
...
...
research/astronet/astronet/data/preprocess.py
View file @
ddee8c74
...
...
@@ -21,29 +21,28 @@ from __future__ import print_function
import
numpy
as
np
import
tensorflow
as
tf
from
astronet.util
import
example_util
from
light_curve_util
import
kepler_io
from
light_curve_util
import
median_filter
from
light_curve_util
import
util
from
third_party.kepler_spline
import
kepler_spline
def
read_
and_process_
light_curve
(
kepid
,
kepler_data_dir
,
max_gap_width
=
0.75
):
"""Reads a light curv
e, fits a B-spline and divides the curve by the splin
e.
def
read_light_curve
(
kepid
,
kepler_data_dir
):
"""Reads a
Kepler
light curve.
Args:
kepid: Kepler id of the target star.
kepler_data_dir: Base directory containing Kepler data. See
kepler_io.kepler_filenames().
max_gap_width: Gap size (in days) above which the light curve is split for
the fitting of B-splines.
Returns:
time: 1D NumPy array; the time values of the light curve.
flux: 1D NumPy array; the normalized flux values of the light curve.
all_time: A list of numpy arrays; the time values of the raw light curve.
all_flux: A list of numpy arrays corresponding to the time arrays in
all_time.
Raises:
IOError: If the light curve files for this Kepler ID cannot be found.
ValueError: If the spline could not be fit.
"""
# Read the Kepler light curve.
file_names
=
kepler_io
.
kepler_filenames
(
kepler_data_dir
,
kepid
)
...
...
@@ -51,21 +50,26 @@ def read_and_process_light_curve(kepid, kepler_data_dir, max_gap_width=0.75):
raise
IOError
(
"Failed to find .fits files in %s for Kepler ID %s"
%
(
kepler_data_dir
,
kepid
))
all_time
,
all_flux
=
kepler_io
.
read_kepler_light_curve
(
file_names
)
return
kepler_io
.
read_kepler_light_curve
(
file_names
)
# Split on gaps.
all_time
,
all_flux
=
util
.
split
(
all_time
,
all_flux
,
gap_width
=
max_gap_width
)
# Logarithmically sample candidate break point spacings between 0.5 and 20
# days.
bkspaces
=
np
.
logspace
(
np
.
log10
(
0.5
),
np
.
log10
(
20
),
num
=
20
)
def
process_light_curve
(
all_time
,
all_flux
):
"""Removes low-frequency variability from a light curve.
# Generate spline.
spline
=
kepler_spline
.
choose_kepler_spline
(
all_time
,
all_flux
,
bkspaces
,
penalty_coeff
=
1.0
,
verbose
=
False
)[
0
]
Args:
all_time: A list of numpy arrays; the time values of the raw light curve.
all_flux: A list of numpy arrays corresponding to the time arrays in
all_time.
Returns:
time: 1D NumPy array; the time values of the light curve.
flux: 1D NumPy array; the normalized flux values of the light curve.
"""
# Split on gaps.
all_time
,
all_flux
=
util
.
split
(
all_time
,
all_flux
,
gap_width
=
0.75
)
if
spline
is
None
:
raise
ValueError
(
"Failed to fit spline with Kepler ID %s"
,
kepid
)
# Fit a piecewise-cubic spline with default arguments.
spline
=
kepler_spline
.
fit_kepler_spline
(
all_time
,
all_flux
,
verbose
=
False
)[
0
]
# Concatenate the piecewise light curve and spline.
time
=
np
.
concatenate
(
all_time
)
...
...
@@ -77,7 +81,6 @@ def read_and_process_light_curve(kepid, kepler_data_dir, max_gap_width=0.75):
# there. Instead we just remove them.
finite_i
=
np
.
isfinite
(
spline
)
if
not
np
.
all
(
finite_i
):
tf
.
logging
.
warn
(
"Incomplete spline with Kepler ID %s"
,
kepid
)
time
=
time
[
finite_i
]
flux
=
flux
[
finite_i
]
spline
=
spline
[
finite_i
]
...
...
@@ -202,3 +205,38 @@ def local_view(time,
bin_width
=
duration
*
bin_width_factor
,
t_min
=
max
(
-
period
/
2
,
-
duration
*
num_durations
),
t_max
=
min
(
period
/
2
,
duration
*
num_durations
))
def
generate_example_for_tce
(
time
,
flux
,
tce
):
"""Generates a tf.train.Example representing an input TCE.
Args:
time: 1D NumPy array; the time values of the light curve.
flux: 1D NumPy array; the normalized flux values of the light curve.
tce: Dict-like object containing at least 'tce_period', 'tce_duration', and
'tce_time0bk'. Additional items are included as features in the output.
Returns:
A tf.train.Example containing features 'global_view', 'local_view', and all
values present in `tce`.
"""
period
=
tce
[
"tce_period"
]
duration
=
tce
[
"tce_duration"
]
t0
=
tce
[
"tce_time0bk"
]
time
,
flux
=
phase_fold_and_sort_light_curve
(
time
,
flux
,
period
,
t0
)
# Make output proto.
ex
=
tf
.
train
.
Example
()
# Set time series features.
example_util
.
set_float_feature
(
ex
,
"global_view"
,
global_view
(
time
,
flux
,
period
))
example_util
.
set_float_feature
(
ex
,
"local_view"
,
local_view
(
time
,
flux
,
period
,
duration
))
# Set other features in `tce`.
for
name
,
value
in
tce
.
items
():
example_util
.
set_feature
(
ex
,
name
,
[
value
])
return
ex
research/astronet/third_party/kepler_spline/kepler_spline.py
View file @
ddee8c74
...
...
@@ -159,8 +159,7 @@ def choose_kepler_spline(all_time,
Args:
all_time: List of 1D numpy arrays; the time values of the light curve.
all_flux: List of 1D numpy arrays; the flux (brightness) values of the light
curve.
all_flux: List of 1D numpy arrays; the flux values of the light curve.
bkspaces: List of break-point spacings to try.
maxiter: Maximum number of attempts to fit each spline after removing badly
fit points.
...
...
@@ -276,3 +275,48 @@ def choose_kepler_spline(all_time,
]
return
best_spline
,
metadata
def
fit_kepler_spline
(
all_time
,
all_flux
,
bkspace_min
=
0.5
,
bkspace_max
=
20
,
bkspace_num
=
20
,
maxiter
=
5
,
penalty_coeff
=
1.0
,
verbose
=
True
):
"""Fits a Kepler spline with logarithmically-sampled breakpoint spacings.
Args:
all_time: List of 1D numpy arrays; the time values of the light curve.
all_flux: List of 1D numpy arrays; the flux values of the light curve.
bkspace_min: Minimum breakpoint spacing to try.
bkspace_max: Maximum breakpoint spacing to try.
bkspace_num: Number of breakpoint spacings to try.
maxiter: Maximum number of attempts to fit each spline after removing badly
fit points.
penalty_coeff: Coefficient of the penalty term for using more parameters in
the Bayesian Information Criterion. Decreasing this value will allow
more parameters to be used (i.e. smaller break-point spacing), and
vice-versa.
verbose: Whether to log individual spline errors. Note that if bkspaces
contains many values (particularly small ones) then this may cause
logging pollution if calling this function for many light curves.
Returns:
spline: List of numpy arrays; values of the best-fit spline corresponding to
to the input flux arrays.
metadata: Object containing metadata about the spline fit.
"""
# Logarithmically sample bkspace_num candidate break point spacings between
# bkspace_min and bkspace_max.
bkspaces
=
np
.
logspace
(
np
.
log10
(
bkspace_min
),
np
.
log10
(
bkspace_max
),
num
=
bkspace_num
)
return
choose_kepler_spline
(
all_time
,
all_flux
,
bkspaces
,
maxiter
=
maxiter
,
penalty_coeff
=
penalty_coeff
,
verbose
=
verbose
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment