# Copyright 2018 The TensorFlow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Functions for reading Kepler data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os.path from astropy.io import fits import numpy as np from light_curve import util from tensorflow import gfile # Quarter index to filename prefix for long cadence Kepler data. # Reference: https://archive.stsci.edu/kepler/software/get_kepler.py LONG_CADENCE_QUARTER_PREFIXES = { 0: ["2009131105131"], 1: ["2009166043257"], 2: ["2009259160929"], 3: ["2009350155506"], 4: ["2010078095331", "2010009091648"], 5: ["2010174085026"], 6: ["2010265121752"], 7: ["2010355172524"], 8: ["2011073133259"], 9: ["2011177032512"], 10: ["2011271113734"], 11: ["2012004120508"], 12: ["2012088054726"], 13: ["2012179063303"], 14: ["2012277125453"], 15: ["2013011073258"], 16: ["2013098041711"], 17: ["2013131215648"] } # Quarter index to filename prefix for short cadence Kepler data. # Reference: https://archive.stsci.edu/kepler/software/get_kepler.py SHORT_CADENCE_QUARTER_PREFIXES = { 0: ["2009131110544"], 1: ["2009166044711"], 2: ["2009201121230", "2009231120729", "2009259162342"], 3: ["2009291181958", "2009322144938", "2009350160919"], 4: ["2010009094841", "2010019161129", "2010049094358", "2010078100744"], 5: ["2010111051353", "2010140023957", "2010174090439"], 6: ["2010203174610", "2010234115140", "2010265121752"], 7: ["2010296114515", "2010326094124", "2010355172524"], 8: ["2011024051157", "2011053090032", "2011073133259"], 9: ["2011116030358", "2011145075126", "2011177032512"], 10: ["2011208035123", "2011240104155", "2011271113734"], 11: ["2011303113607", "2011334093404", "2012004120508"], 12: ["2012032013838", "2012060035710", "2012088054726"], 13: ["2012121044856", "2012151031540", "2012179063303"], 14: ["2012211050319", "2012242122129", "2012277125453"], 15: ["2012310112549", "2012341132017", "2013011073258"], 16: ["2013017113907", "2013065031647", "2013098041711"], 17: ["2013121191144", "2013131215648"] } # Quarter order for different scrambling procedures. # Page 9: https://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20170009549.pdf. SIMULATED_DATA_SCRAMBLE_ORDERS = { "SCR1": [0, 13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 1, 2, 3, 4, 17], "SCR2": [0, 1, 2, 3, 4, 13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 17], "SCR3": [0, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 17], } def kepler_filenames(base_dir, kep_id, long_cadence=True, quarters=None, injected_group=None, check_existence=True): """Returns the light curve filenames for a Kepler target star. This function assumes the directory structure of the Mikulski Archive for Space Telescopes (http://archive.stsci.edu/pub/kepler/lightcurves). Specifically, the filenames for a particular Kepler target star have the following format: ${kep_id:0:4}/${kep_id}/kplr${kep_id}-${quarter_prefix}_${type}.fits, where: kep_id is the Kepler id left-padded with zeros to length 9; quarter_prefix is the filename quarter prefix; type is one of "llc" (long cadence light curve) or "slc" (short cadence light curve). Args: base_dir: Base directory containing Kepler data. kep_id: Id of the Kepler target star. May be an int or a possibly zero- padded string. long_cadence: Whether to read a long cadence (~29.4 min / measurement) light curve as opposed to a short cadence (~1 min / measurement) light curve. quarters: Optional list of integers in [0, 17]; the quarters of the Kepler mission to return. injected_group: Optional string indicating injected light curves. One of "inj1", "inj2", "inj3". check_existence: If True, only return filenames corresponding to files that exist (not all stars have data for all quarters). Returns: A list of filenames. """ # Pad the Kepler id with zeros to length 9. kep_id = "{:09d}".format(int(kep_id)) quarter_prefixes, cadence_suffix = ((LONG_CADENCE_QUARTER_PREFIXES, "llc") if long_cadence else (SHORT_CADENCE_QUARTER_PREFIXES, "slc")) if quarters is None: quarters = quarter_prefixes.keys() quarters = sorted(quarters) # Sort quarters chronologically. filenames = [] base_dir = os.path.join(base_dir, kep_id[0:4], kep_id) for quarter in quarters: for quarter_prefix in quarter_prefixes[quarter]: if injected_group: base_name = "kplr{}-{}_INJECTED-{}_{}.fits".format( kep_id, quarter_prefix, injected_group, cadence_suffix) else: base_name = "kplr{}-{}_{}.fits".format(kep_id, quarter_prefix, cadence_suffix) filename = os.path.join(base_dir, base_name) # Not all stars have data for all quarters. if not check_existence or gfile.Exists(filename): filenames.append(filename) return filenames def scramble_light_curve(all_time, all_flux, all_quarters, scramble_type): """Scrambles a light curve according to a given scrambling procedure. Args: all_time: List holding arrays of time values, each containing a quarter of time data. all_flux: List holding arrays of flux values, each containing a quarter of flux data. all_quarters: List of integers specifying which quarters are present in the light curve (max is 18: Q0...Q17). scramble_type: String specifying the scramble order, one of {'SCR1', 'SCR2', 'SCR3'}. Returns: scr_flux: Scrambled flux values; the same list as the input flux in another order. scr_time: Time values, re-partitioned to match sizes of the scr_flux lists. """ order = SIMULATED_DATA_SCRAMBLE_ORDERS[scramble_type] scr_flux = [] for quarter in order: # Ignore missing quarters in the scramble order. if quarter in all_quarters: scr_flux.append(all_flux[all_quarters.index(quarter)]) scr_time = util.reshard_arrays(all_time, scr_flux) return scr_time, scr_flux def read_kepler_light_curve(filenames, light_curve_extension="LIGHTCURVE", scramble_type=None, interpolate_missing_time=False): """Reads time and flux measurements for a Kepler target star. Args: filenames: A list of .fits files containing time and flux measurements. light_curve_extension: Name of the HDU 1 extension containing light curves. scramble_type: What scrambling procedure to use: 'SCR1', 'SCR2', or 'SCR3' (pg 9: https://exoplanetarchive.ipac.caltech.edu/docs/KSCI-19114-002.pdf). interpolate_missing_time: Whether to interpolate missing (NaN) time values. This should only affect the output if scramble_type is specified (NaN time values typically come with NaN flux values, which are removed anyway, but scrambing decouples NaN time values from NaN flux values). Returns: all_time: A list of numpy arrays; the time values of the light curve. all_flux: A list of numpy arrays; the flux values of the light curve. """ all_time = [] all_flux = [] all_quarters = [] for filename in filenames: with fits.open(gfile.Open(filename, "rb")) as hdu_list: quarter = hdu_list["PRIMARY"].header["QUARTER"] light_curve = hdu_list[light_curve_extension].data time = light_curve.TIME flux = light_curve.PDCSAP_FLUX if not time.size: continue # No data. # Possibly interpolate missing time values. if interpolate_missing_time: time = util.interpolate_missing_time(time, light_curve.CADENCENO) all_time.append(time) all_flux.append(flux) all_quarters.append(quarter) if scramble_type: all_time, all_flux = scramble_light_curve(all_time, all_flux, all_quarters, scramble_type) # Remove timestamps with NaN time or flux values. for i, (time, flux) in enumerate(zip(all_time, all_flux)): flux_and_time_finite = np.logical_and(np.isfinite(flux), np.isfinite(time)) all_time[i] = time[flux_and_time_finite] all_flux[i] = flux[flux_and_time_finite] return all_time, all_flux