Commit e8742a3d authored by Alex Tamkin's avatar Alex Tamkin Committed by Christopher Shallue
Browse files

Modify processing pipeline to enable generation of scrambled lightcurves. Fix...

Modify processing pipeline to enable generation of scrambled lightcurves. Fix bugs to enable generation of inverted lightcurves.

PiperOrigin-RevId: 207595688
parent 025efbf3
...@@ -73,6 +73,14 @@ SHORT_CADENCE_QUARTER_PREFIXES = { ...@@ -73,6 +73,14 @@ SHORT_CADENCE_QUARTER_PREFIXES = {
17: ["2013121191144", "2013131215648"] 17: ["2013121191144", "2013131215648"]
} }
# Quarter order for different scrambling procedures.
# Page 9: https://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20170009549.pdf.
SIMULATED_DATA_SCRAMBLE_ORDERS = {
"SCR1": [13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 1, 2, 3, 4, 17],
"SCR2": [1, 2, 3, 4, 13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 17],
"SCR3": [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 17],
}
def kepler_filenames(base_dir, def kepler_filenames(base_dir,
kep_id, kep_id,
...@@ -142,12 +150,51 @@ def kepler_filenames(base_dir, ...@@ -142,12 +150,51 @@ def kepler_filenames(base_dir,
return filenames return filenames
def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE"): def scramble_light_curve(all_flux, all_time, all_quarters, scramble_type):
"""Scrambles a light curve according to a given scrambling procedure.
Args:
all_flux: List holding lists of flux values (each interior list holds a
quarter of flux data).
all_time: List holding lists of time values (each interior list holds a
quarter of time data).
all_quarters: List of integers specifying which quarters were present in
the light curve (max is 18: Q0...Q17).
scramble_type: String specifying the scramble order, one of {'SCR1', 'SCR2',
'SCR3'}.
Returns:
scr_flux: scrambled flux values, the same list of lists in another order
scr_time: time values, re-partitioned to match sizes of the scr_flux lists
"""
order = SIMULATED_DATA_SCRAMBLE_ORDERS[scramble_type]
scr_flux = []
for quarter in order:
# Ignore missing quarters in the scramble order.
if quarter in all_quarters:
scr_flux.append(all_flux[all_quarters.index(quarter)])
# Reapportion time lists to match sizes of respective flux lists.
concat_time = np.concatenate(all_time)
scr_time = []
for flux in scr_flux:
same_len_time_list = list(concat_time[:len(flux)])
scr_time.append(same_len_time_list)
concat_time = concat_time[len(flux):]
return scr_flux, scr_time
def read_kepler_light_curve(filenames,
light_curve_extension="LIGHTCURVE",
scramble_type=None):
"""Reads time and flux measurements for a Kepler target star. """Reads time and flux measurements for a Kepler target star.
Args: Args:
filenames: A list of .fits files containing time and flux measurements. filenames: A list of .fits files containing time and flux measurements.
light_curve_extension: Name of the HDU 1 extension containing light curves. light_curve_extension: Name of the HDU 1 extension containing light curves.
scramble_type: What scrambling procedure to use: 'SCR1', 'SCR2', or 'SCR3'
(pg 9: https://exoplanetarchive.ipac.caltech.edu/docs/KSCI-19114-002.pdf).
Returns: Returns:
all_time: A list of numpy arrays; the time values of the light curve. all_time: A list of numpy arrays; the time values of the light curve.
...@@ -156,6 +203,7 @@ def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE"): ...@@ -156,6 +203,7 @@ def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE"):
""" """
all_time = [] all_time = []
all_flux = [] all_flux = []
all_quarters = []
for filename in filenames: for filename in filenames:
with fits.open(gfile.Open(filename, "rb")) as hdu_list: with fits.open(gfile.Open(filename, "rb")) as hdu_list:
...@@ -163,13 +211,21 @@ def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE"): ...@@ -163,13 +211,21 @@ def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE"):
time = light_curve.TIME time = light_curve.TIME
flux = light_curve.PDCSAP_FLUX flux = light_curve.PDCSAP_FLUX
# Remove NaN flux values. # Index into primary HDU header and get quarter.
valid_indices = np.where(np.isfinite(flux)) all_quarters.append(hdu_list[0].header["QUARTER"])
time = time[valid_indices]
flux = flux[valid_indices]
if time.size: if time.size:
all_time.append(time) all_time.append(time)
all_flux.append(flux) all_flux.append(flux)
if scramble_type:
all_flux, all_time = scramble_light_curve(all_flux, all_time, all_quarters,
scramble_type)
# Remove NaN flux values after potential scrambling.
for i, (flux, time) in enumerate(zip(all_flux, all_time)):
valid_indices = np.where(np.isfinite(flux))
all_time[i] = time[valid_indices]
all_flux[i] = flux[valid_indices]
return all_time, all_flux return all_time, all_flux
...@@ -34,6 +34,23 @@ class KeplerIoTest(absltest.TestCase): ...@@ -34,6 +34,23 @@ class KeplerIoTest(absltest.TestCase):
def setUp(self): def setUp(self):
self.data_dir = os.path.join(FLAGS.test_srcdir, _DATA_DIR) self.data_dir = os.path.join(FLAGS.test_srcdir, _DATA_DIR)
def testScrambleLightCurve(self):
nan = float("nan")
all_flux = [[11, 12], [21], [nan, nan, 33], [41, 42]]
all_time = [[101, 102], [201], [301, 302, 303], [401, 402]]
all_quarters = [3, 4, 7, 14]
scramble_type = "SCR1" # New quarters order will be [14,7,3,4].
scr_flux, scr_time = kepler_io.scramble_light_curve(
all_flux, all_time, all_quarters, scramble_type)
# NaNs are not removed in this function.
gold_flux = [[41, 42], [nan, nan, 33], [11, 12], [21]]
gold_time = [[101, 102], [201, 301, 302], [303, 401], [402]]
self.assertEqual(gold_flux, scr_flux)
self.assertEqual(gold_time, scr_time)
def testKeplerFilenames(self): def testKeplerFilenames(self):
# All quarters. # All quarters.
filenames = kepler_io.kepler_filenames( filenames = kepler_io.kepler_filenames(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment