From e99d7193c6f345e0f6d68dd7b5e3b818ef1e3f69 Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 15 Mar 2022 14:48:15 -0700 Subject: [PATCH 1/5] split time model into segments --- src/psfmachine/machine.py | 226 ++++++++++++++++++++++++++++---------- 1 file changed, 166 insertions(+), 60 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index f02dfde..2538837 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -645,9 +645,11 @@ def _time_bin(self, npoints=200, downsample=False): """ # Where there are break points in the time array + dts = np.diff(self.time) splits = np.append( - np.append(0, np.where(np.diff(self.time) > 0.1)[0] + 1), len(self.time) + np.append(0, np.where(dts > 5 * np.median(dts))[0] + 1), len(self.time) ) + del dts # if using poscorr, find and add discontinuity in poscorr data if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", @@ -681,10 +683,23 @@ def _time_bin(self, npoints=200, downsample=False): # within them breaks = [] for spdx in range(len(splits_a)): - breaks.append(splits_a[spdx] + np.arange(0, dsplits[spdx]) * npoints) - # we include the last cadance as 99% of the sequence lenght - breaks.append(int(self.nt * 0.99)) - breaks = np.hstack(breaks) + if not downsample: + breaks.append(splits_a[spdx] + np.arange(0, dsplits[spdx]) * npoints) + else: + # if downsample, the knots are spaced by npoints untill the end of the + # segment + breaks.append( + np.linspace( + splits_a[spdx], + splits_b[spdx] - int(self.nt * 0.01), + dsplits[spdx], + dtype=int, + ) + ) + breaks = np.unique(np.hstack(breaks)) + # we include the last cadance as 99% of the sequence lenght if necessary + if breaks[-1] < self.nt - npoints: + breaks = np.append(breaks, int(self.nt * 0.99)) if not downsample: # time averaged between breaks @@ -722,13 +737,8 @@ def _time_bin(self, npoints=200, downsample=False): ] ) else: - dwns_idx = ( - np.vstack( - [np.median(t1) for t1 in np.array_split(np.arange(self.nt), breaks)] - ) - .ravel() - .astype(int) - ) + # use breaks as downsample points + dwns_idx = breaks tm = self.time[dwns_idx] ta = (self.time - tm.mean()) / (tm.max() - tm.mean()) fm = np.asarray( @@ -783,6 +793,12 @@ def _time_bin(self, npoints=200, downsample=False): ) pc1_smooth = np.array(pc1_smooth) pc2_smooth = np.array(pc2_smooth) + pc1_smooth = (pc1_smooth - pc1_smooth.mean()) / ( + pc1_smooth.max() - pc1_smooth.mean() + ) + pc2_smooth = (pc2_smooth - pc2_smooth.mean()) / ( + pc2_smooth.max() - pc2_smooth.mean() + ) # do poscorr binning if not downsample: @@ -810,7 +826,7 @@ def _time_bin(self, npoints=200, downsample=False): return ta, tm, fm_raw, fm, fem - def build_time_model(self, plot=False, downsample=False): + def build_time_model(self, plot=False, downsample=False, split_models=False): """ Builds a time model that moves the PRF model to account for the scene movement due to velocity aberration. It has two methods to choose from using the @@ -827,6 +843,9 @@ def build_time_model(self, plot=False, downsample=False): downsample: boolean If True the `time` and `pos_corr` arrays will be downsampled instead of binned. + split_models : boolean + Will split the light curve into segments to fit different time models with + a commong pixel normalization. """ if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", @@ -855,6 +874,7 @@ def build_time_model(self, plot=False, downsample=False): ) = self._time_bin(npoints=self.n_time_points, downsample=downsample) self._whitened_time = time_original + self.downsample_time_knots = downsample # not necessary to take value from Quantity to do .multiply() dx, dy = ( self.uncontaminated_source_mask.multiply(self.dra), @@ -882,56 +902,108 @@ def build_time_model(self, plot=False, downsample=False): # Cartesian spline with time dependence A3 = _combine_A(A2, time=time_binned) - # No saturated pixels - k = ( - (flux_binned_raw < 1.4e5).any(axis=0)[None, :] - * np.ones(flux_binned_raw.shape, bool) - ).ravel() - # No faint pixels - k &= ( - (flux_binned_raw > 10).any(axis=0)[None, :] - * np.ones(flux_binned_raw.shape, bool) - ).ravel() - # No huge variability - k &= ( - (np.abs(flux_binned - 1) < 1).all(axis=0)[None, :] - * np.ones(flux_binned.shape, bool) - ).ravel() - # No nans - k &= np.isfinite(flux_binned.ravel()) & np.isfinite(flux_err_binned.ravel()) prior_sigma = np.ones(A3.shape[1]) * 10 prior_mu = np.zeros(A3.shape[1]) - for count in [0, 1, 2]: - sigma_w_inv = A3[k].T.dot( - (A3.multiply(1 / flux_err_binned.ravel()[:, None] ** 2)).tocsr()[k] + # fit the time model for each segment + # we find the splits in data + if split_models: + dts = np.diff(self.time) + splits = np.append( + np.append(0, np.where(dts > 5 * np.median(dts))[0] + 1), len(self.time) ) - sigma_w_inv += np.diag(1 / prior_sigma ** 2) - # Fit the flux - 1 - B = A3[k].T.dot( - ((flux_binned.ravel() - 1) / flux_err_binned.ravel() ** 2)[k] + else: + splits = np.array([0, len(self.time)]) + + seg_time_model_w, pix_mask_k = [], [] + # we iterate over segements + for bk in range(len(splits) - 1): + # find the right mask that select the binned times andd flux to fit the + # time model + seg_mask = (time_binned[:, 0] >= time_original[splits[bk]]) & ( + time_binned[:, 0] < time_original[splits[bk + 1] - 1] ) - B += prior_mu / (prior_sigma ** 2) - time_model_w = np.linalg.solve(sigma_w_inv, B) - res = flux_binned - A3.dot(time_model_w).reshape(flux_binned.shape) - res = np.ma.masked_array(res, (~k).reshape(flux_binned.shape)) - bad_targets = sigma_clip(res, sigma=5).mask - bad_targets = ( - np.ones(flux_binned.shape, bool) & bad_targets.any(axis=0) + print(seg_mask) + # need to rebuild the A3 DM to use the rigth times/poscorrs + A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") + if hasattr(self, "pos_corr1") and self.time_corrector in [ + "pos_corr", + "centroid", + ]: + A3 = _combine_A( + A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]] + ) + else: + A3 = _combine_A(A2, time=time_binned[seg_mask]) + print(A3.shape) + + # No saturated pixels + k = ( + (flux_binned_raw < 1.4e5).any(axis=0)[None, :] + * np.ones(flux_binned_raw[seg_mask].shape, bool) + ).ravel() + # No faint pixels + k &= ( + (flux_binned_raw[seg_mask] > 10).any(axis=0)[None, :] + * np.ones(flux_binned_raw[seg_mask].shape, bool) + ).ravel() + # No huge variability + k &= ( + (np.abs(flux_binned[seg_mask] - 1) < 1).all(axis=0)[None, :] + * np.ones(flux_binned[seg_mask].shape, bool) ).ravel() - # k &= ~sigma_clip(flux_binned.ravel() - A3.dot(time_model_w)).mask - k &= ~bad_targets + # No nans + k &= np.isfinite(flux_binned[seg_mask].ravel()) & np.isfinite( + flux_err_binned[seg_mask].ravel() + ) - self.time_model_w = time_model_w - self._time_masked = k + # we solve the segment by using `seg_mask` on all `*_binned` variables + for count in [0, 1, 2]: + sigma_w_inv = A3[k].T.dot( + ( + A3.multiply(1 / flux_err_binned[seg_mask].ravel()[:, None] ** 2) + ).tocsr()[k] + ) + sigma_w_inv += np.diag(1 / prior_sigma ** 2) + # Fit the flux - 1 + B = A3[k].T.dot( + ( + (flux_binned[seg_mask].ravel() - 1) + / flux_err_binned[seg_mask].ravel() ** 2 + )[k] + ) + B += prior_mu / (prior_sigma ** 2) + time_model_w = np.linalg.solve(sigma_w_inv, B) + res = flux_binned[seg_mask] - A3.dot(time_model_w).reshape( + flux_binned[seg_mask].shape + ) + res = np.ma.masked_array(res, (~k).reshape(flux_binned[seg_mask].shape)) + bad_targets = sigma_clip(res, sigma=5).mask + bad_targets = ( + np.ones(flux_binned[seg_mask].shape, bool) & bad_targets.any(axis=0) + ).ravel() + # k &= ~sigma_clip(flux_binned.ravel() - A3.dot(time_model_w)).mask + k &= ~bad_targets + # we save the weights and pixel mask of each segement for later use + seg_time_model_w.append(time_model_w) + pix_mask_k.append(k) + + self.seg_splits = splits + self.time_model_w = np.asarray(seg_time_model_w) + self._time_masked = np.asarray(pix_mask_k, dtype=object) if plot: return self.plot_time_model() return - def plot_time_model(self): + def plot_time_model(self, segment=0): """ Diagnostic plot of time model. + Parameters + ---------- + segment : int + Which light curve segment will be plotted, default is first one. + Returns ------- fig : matplotlib.Figure @@ -951,7 +1023,9 @@ def plot_time_model(self): poscorr2_smooth, poscorr1_binned, poscorr2_binned, - ) = self._time_bin(npoints=self.n_time_points) + ) = self._time_bin( + npoints=self.n_time_points, downsample=self.downsample_time_knots + ) else: ( time_original, @@ -959,7 +1033,9 @@ def plot_time_model(self): flux_binned_raw, flux_binned, flux_err_binned, - ) = self._time_bin(npoints=self.n_time_points) + ) = self._time_bin( + npoints=self.n_time_points, downsample=self.downsample_time_knots + ) # not necessary to take value from Quantity to do .multiply() dx, dy = ( @@ -976,7 +1052,14 @@ def plot_time_model(self): radius=self.time_radius, spacing=self.cartesian_knot_spacing, ) - A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr") + # if self.seg_splits.shape[0] == 2 and segment == 0: + # seg_mask = np.ones(time_binned.shape[0], dtype=bool) + # else: + seg_mask = (time_binned[:, 0] >= time_original[self.seg_splits[segment]]) & ( + time_binned[:, 0] < time_original[self.seg_splits[segment + 1] - 1] + ) + # find the right mask that select the binned times andd flux + A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") # Cartesian spline with time dependence # Cartesian spline with time dependence if hasattr(self, "pos_corr1") and self.time_corrector in [ @@ -984,19 +1067,32 @@ def plot_time_model(self): "centroid", ]: # Cartesian spline with poscor dependence - A3 = _combine_A(A2, poscorr=[poscorr1_binned, poscorr2_binned]) + A3 = _combine_A( + A2, poscorr=[poscorr1_binned[seg_mask], poscorr2_binned[seg_mask]] + ) else: # Cartesian spline with time dependence - A3 = _combine_A(A2, time=time_binned) + A3 = _combine_A(A2, time=time_binned[seg_mask]) - model = A3.dot(self.time_model_w).reshape(flux_binned.shape) + 1 + model = ( + A3.dot(self.time_model_w[segment]).reshape(flux_binned[seg_mask].shape) + 1 + ) fig, ax = plt.subplots(2, 2, figsize=(7, 6), facecolor="w") - k1 = self._time_masked.reshape(flux_binned.shape)[0] - k2 = self._time_masked.reshape(flux_binned.shape)[-1] + # this is currently breaking when doing just 1 segment. Works fine for multiple. + k1 = ( + self._time_masked[segment] + .reshape(flux_binned[seg_mask].shape)[0] + .astype(bool) + ) + k2 = ( + self._time_masked[segment] + .reshape(flux_binned[seg_mask].shape)[-1] + .astype(bool) + ) im = ax[0, 0].scatter( dx[k1], dy[k1], - c=flux_binned[0][k1], + c=flux_binned[seg_mask][0][k1], s=3, vmin=0.5, vmax=1.5, @@ -1006,7 +1102,7 @@ def plot_time_model(self): ax[0, 1].scatter( dx[k2], dy[k2], - c=flux_binned[-1][k2], + c=flux_binned[seg_mask][-1][k2], s=3, vmin=0.5, vmax=1.5, @@ -1531,7 +1627,17 @@ def fit_model(self, fit_va=False): (self._whitened_time[tdx] ** np.arange(4))[:, None] * np.ones(A_cp3.shape[1] // 4) ) - X.data *= A_cp3.multiply(t_mult).dot(self.time_model_w) + 1 + # we make sure to use the `time_model_w` that correspond to the segment + # we are computing + seg_num = np.where( + [ + (tdx >= self.seg_splits[k]) & (tdx < self.seg_splits[k + 1]) + for k in range(len(self.seg_splits) - 1) + ] + )[0] + X.data *= ( + A_cp3.multiply(t_mult).dot(self.time_model_w[seg_num].ravel()) + 1 + ) X = X.T sigma_w_inv = X.T.dot( From 510324b9fb9be493c0248e4d095fc51b977164bd Mon Sep 17 00:00:00 2001 From: Jorge Date: Tue, 15 Mar 2022 17:23:48 -0700 Subject: [PATCH 2/5] fixing #52 1st review --- src/psfmachine/machine.py | 46 +++++++++++++++++---------------------- src/psfmachine/utils.py | 18 +++++++++++++++ 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index 2538837..b08eb30 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -17,6 +17,7 @@ sparse_lessthan, _combine_A, threshold_bin, + get_breaks, ) from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP @@ -645,11 +646,7 @@ def _time_bin(self, npoints=200, downsample=False): """ # Where there are break points in the time array - dts = np.diff(self.time) - splits = np.append( - np.append(0, np.where(dts > 5 * np.median(dts))[0] + 1), len(self.time) - ) - del dts + splits = get_breaks(self.time) # if using poscorr, find and add discontinuity in poscorr data if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", @@ -697,9 +694,6 @@ def _time_bin(self, npoints=200, downsample=False): ) ) breaks = np.unique(np.hstack(breaks)) - # we include the last cadance as 99% of the sequence lenght if necessary - if breaks[-1] < self.nt - npoints: - breaks = np.append(breaks, int(self.nt * 0.99)) if not downsample: # time averaged between breaks @@ -826,7 +820,7 @@ def _time_bin(self, npoints=200, downsample=False): return ta, tm, fm_raw, fm, fem - def build_time_model(self, plot=False, downsample=False, split_models=False): + def build_time_model(self, plot=False, downsample=False, split_time_model=False): """ Builds a time model that moves the PRF model to account for the scene movement due to velocity aberration. It has two methods to choose from using the @@ -843,9 +837,11 @@ def build_time_model(self, plot=False, downsample=False, split_models=False): downsample: boolean If True the `time` and `pos_corr` arrays will be downsampled instead of binned. - split_models : boolean - Will split the light curve into segments to fit different time models with - a commong pixel normalization. + split_time_model : boolean, list/array of ints + If `True` will split the light curve into segments to fit different time + models with a commong pixel normalization. If `False` will fit the full + time series as one segment. If list or numpy array of ints, will use the + index values as segment breaks. """ if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", @@ -906,14 +902,16 @@ def build_time_model(self, plot=False, downsample=False, split_models=False): prior_mu = np.zeros(A3.shape[1]) # fit the time model for each segment - # we find the splits in data - if split_models: - dts = np.diff(self.time) - splits = np.append( - np.append(0, np.where(dts > 5 * np.median(dts))[0] + 1), len(self.time) - ) + # use user input + if isinstance(split_time_model, (np.ndarray, list)): + # we make sure first and last index are included and sorted + splits = np.unique(np.append(split_time_model, [0, len(self.time)])) else: - splits = np.array([0, len(self.time)]) + # we find the splits in data + if split_time_model: + splits = get_breaks(self.time) + else: + splits = np.array([0, len(self.time)]) seg_time_model_w, pix_mask_k = [], [] # we iterate over segements @@ -923,7 +921,6 @@ def build_time_model(self, plot=False, downsample=False, split_models=False): seg_mask = (time_binned[:, 0] >= time_original[splits[bk]]) & ( time_binned[:, 0] < time_original[splits[bk + 1] - 1] ) - print(seg_mask) # need to rebuild the A3 DM to use the rigth times/poscorrs A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") if hasattr(self, "pos_corr1") and self.time_corrector in [ @@ -935,16 +932,15 @@ def build_time_model(self, plot=False, downsample=False, split_models=False): ) else: A3 = _combine_A(A2, time=time_binned[seg_mask]) - print(A3.shape) # No saturated pixels k = ( - (flux_binned_raw < 1.4e5).any(axis=0)[None, :] + (flux_binned_raw < 1.4e5).all(axis=0)[None, :] * np.ones(flux_binned_raw[seg_mask].shape, bool) ).ravel() # No faint pixels k &= ( - (flux_binned_raw[seg_mask] > 10).any(axis=0)[None, :] + (flux_binned_raw[seg_mask] > 100).all(axis=0)[None, :] * np.ones(flux_binned_raw[seg_mask].shape, bool) ).ravel() # No huge variability @@ -1060,8 +1056,7 @@ def plot_time_model(self, segment=0): ) # find the right mask that select the binned times andd flux A2 = sparse.vstack([A_c] * time_binned[seg_mask].shape[0], format="csr") - # Cartesian spline with time dependence - # Cartesian spline with time dependence + if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", "centroid", @@ -1078,7 +1073,6 @@ def plot_time_model(self, segment=0): A3.dot(self.time_model_w[segment]).reshape(flux_binned[seg_mask].shape) + 1 ) fig, ax = plt.subplots(2, 2, figsize=(7, 6), facecolor="w") - # this is currently breaking when doing just 1 segment. Works fine for multiple. k1 = ( self._time_masked[segment] .reshape(flux_binned[seg_mask].shape)[0] diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index df5356f..9356680 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -542,3 +542,21 @@ def threshold_bin(x, y, z, z_err=None, abs_thresh=10, bins=15, statistic=np.nanm np.hstack(new_z), np.hstack(new_z_err), ) + + +def get_breaks(time): + """ + Finds discontinuity in the time array and return the break indexes. + + Parameters + ---------- + time : numpy.ndarray + Array with time values + + Returns + ------- + splits : numpy.ndarray + An array of indexes with the break positions + """ + dts = np.diff(time) + return np.hstack([0, np.where(dts > 5 * np.median(dts))[0] + 1, len(time)]) From fa5cfac6bc1d20e929ef35437c2d8d98697dd04c Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 16 Mar 2022 15:18:01 -0700 Subject: [PATCH 3/5] multi-epoch TPF parser --- src/psfmachine/tpf.py | 365 +++++++++++++++++++++++++++++++++--------- 1 file changed, 285 insertions(+), 80 deletions(-) diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 9da96bd..a12e46f 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -12,6 +12,7 @@ import urllib.request import tarfile from kbackground import Estimator +from tqdm import tqdm from .utils import get_gaia_sources from .aperture import estimate_source_centroids_aperture, aperture_mask_to_2d @@ -804,6 +805,7 @@ def from_TPFs( time_mask=None, apply_focus_mask=True, renormalize_tpf_bkg=False, + skygroup_multi_quarters=False, query_ra=None, query_dec=None, query_rad=None, @@ -888,21 +890,44 @@ def from_TPFs( raise ValueError("Please only pass `lk.KeplerTargetPixelFiles`") if len(np.unique(tpf_meta["channel"])) != 1: raise ValueError("TPFs span multiple channels.") + if not skygroup_multi_quarters and len(set([x.quarter for x in tpfs])) != 1: + raise ValueError("TPFs span multiple quarters") # parse tpfs - ( - times, - flux, - flux_err, - pos_corr1, - pos_corr2, - column, - row, - unw, - focus_mask, - qual_mask, - saturated_mask, - ) = _parse_TPFs(tpfs, renormalize_tpf_bkg=renormalize_tpf_bkg, **kwargs) + if not skygroup_multi_quarters: + ( + times, + flux, + flux_err, + pos_corr1, + pos_corr2, + column, + row, + unw, + focus_mask, + qual_mask, + saturated_mask, + ) = _parse_TPFs(tpfs, renormalize_tpf_bkg=renormalize_tpf_bkg, **kwargs) + else: + ( + tpfs_tid_qs, + times, + flux, + flux_err, + pos_corr1, + pos_corr2, + column, + row, + ra, + dec, + unw, + focus_mask, + qual_mask, + saturated_mask, + ) = _parse_multi_quarter_TPFs( + tpfs, renormalize_tpf_bkg=renormalize_tpf_bkg, **kwargs + ) + tpfs = [item for sublist in tpfs_tid_qs for item in sublist] if time_mask is not None: time_mask = np.copy(time_mask)[qual_mask] @@ -910,78 +935,35 @@ def from_TPFs( focus_mask = focus_mask if apply_focus_mask else None # convert to RA Dec - locs, ra, dec = _wcs_from_tpfs(tpfs) + if not skygroup_multi_quarters: + locs, ra, dec = _wcs_from_tpfs(tpfs) + else: + locs = np.asarray([column, row]) # preprocess arrays - ( + (flux, flux_err, unw, locs, ra, dec, column, row,) = _preprocess( flux, flux_err, - # pos_corr1, - # pos_corr2, unw, locs, ra, dec, column, row, - ) = _preprocess( - flux, - flux_err, - # pos_corr1, - # pos_corr2, - unw, - locs, - ra, - dec, - column, - row, - tpfs, saturated_mask, ) sources = _get_coord_and_query_gaia( - tpfs, magnitude_limit, dr=dr, ra=query_ra, dec=query_dec, rad=query_rad - ) - - def get_tpf2source(): - tpf2source = [] - for tpf in tpfs: - tpfra, tpfdec = tpf.get_coordinates(cadence=0) - dra = np.abs(tpfra.ravel() - np.asarray(sources.ra)[:, None]) - ddec = np.abs(tpfdec.ravel() - np.asarray(sources.dec)[:, None]) - tpf2source.append( - np.where( - ( - (dra < 4 * 3 * u.arcsecond.to(u.deg)) - & (ddec < 4 * 3 * u.arcsecond.to(u.deg)) - ).any(axis=1) - )[0] - ) - return tpf2source - - tpf2source = get_tpf2source() - sources = sources[ - np.in1d(np.arange(len(sources)), np.hstack(tpf2source)) - ].reset_index(drop=True) - # sources, _ = _clean_source_list(sources, ra, dec) - tpf_meta["sources"] = get_tpf2source() - - idx, sep, _ = match_coordinates_sky( - SkyCoord(tpf_meta["ra"], tpf_meta["dec"], unit="deg"), - SkyCoord(np.asarray(sources[["ra", "dec"]]), unit="deg"), - ) - match = (sep < 1 * u.arcsec) & ( - np.abs( - np.asarray(sources["phot_g_mean_mag"][idx]) - - np.asarray( - [t if t is not None else np.nan for t in tpf_meta["tpfmag"]] - ) - ) - < 0.25 + tpfs, + magnitude_limit, + dr=dr, + ra=query_ra, + dec=query_dec, + rad=query_rad, + epoch=Time(times[len(times) // 2], format="jd").jyear, ) - sources["tpf_id"] = None - sources.loc[idx[match], "tpf_id"] = np.asarray(tpf_meta["targetid"])[match] + sources, tpf_meta = _get_tpf2source(tpfs, sources, tpf_meta) # return a Machine object return TPFMachine( @@ -1122,18 +1104,202 @@ def _parse_TPFs(tpfs, renormalize_tpf_bkg=True, **kwargs): ) +def _parse_multi_quarter_TPFs(tpfs, renormalize_tpf_bkg=False, quiet=False, **kwargs): + """ + Parse TPF collection to extract times, pixel fluxes, flux errors and tpf-index + per pixel + Parameters + ---------- + tpfs: lightkurve TargetPixelFileCollection + Collection of Target Pixel files + Returns + ------- + times: numpy.ndarray + Array with time values + flux: numpy.ndarray + Array with flux values per pixel + flux_err: numpy.ndarray + Array with flux errors per pixel + unw: numpy.ndarray + Array with TPF index for each pixel + """ + + # check how many quarters we have + quarters = np.asarray([x.quarter for x in tpfs]) + nquarters = len(set(quarters)) + if len(set(quarters)) > 1: + target_ids = np.asarray([x.targetid for x in tpfs]) + tpfs_tid_qs = [] + cadences = [] + pos_corr1, pos_corr2 = [], [] + # creat list of tuples with quarter TPFs for every target ID + for k, tid in enumerate(sorted(set(target_ids))): + mask = target_ids == tid + if mask.sum() != len(set(quarters)): + continue + tpf_qs = tpfs[mask][np.argsort(quarters[mask])] + tpfs_tid_qs.append(tpf_qs) + cadences.append(np.hstack([x.cadenceno for x in tpf_qs])) + pos_corr1.append(np.hstack([x.pos_corr1 for x in tpf_qs])) + pos_corr2.append(np.hstack([x.pos_corr2 for x in tpf_qs])) + # extract time, cadence, poscorrs, and quality values + cadences = np.asarray(cadences) + pos_corr1 = np.asarray(pos_corr1) + pos_corr2 = np.asarray(pos_corr2) + time = np.hstack([x.time.jd for x in tpfs_tid_qs[0]]) + qual = np.hstack([x.quality for x in tpfs_tid_qs[0]]) + else: + raise ValueError("TPFs are not multi quarters.") + + if isinstance(tpfs[0], lk.KeplerTargetPixelFile): + qual = np.hstack([tpfs_tid_qs[0][q].quality for q in range(nquarters)]) + qual_mask = lk.utils.KeplerQualityFlags.create_quality_mask( + qual, 1 | 2 | 4 | 8 | 32 | 16384 | 65536 | 1048576 + ) + qual_mask &= (np.abs(pos_corr1[0]) < 5) & (np.abs(pos_corr2[0]) < 5) + # Cut out 1.5 days after every data gap + dt = np.hstack([10, np.diff(time)]) + focus_mask = ~np.in1d( + np.arange(len(time)), + np.hstack( + [ + np.arange(t, t + int(1.5 / np.median(dt))) + for t in np.where(dt > (np.median(dt) * 5))[0] + ] + ), + ) + focus_mask = focus_mask[qual_mask] + + elif isinstance(tpfs[0], lk.TessTargetPixelFile): + raise NotImplementedError("Multi quarter not implemented for TESS data") + + # mask by quality + cadences = cadences[:, qual_mask] + times = time[qual_mask] + pos_corr1 = pos_corr1[:, qual_mask] + pos_corr2 = pos_corr2[:, qual_mask] + + # check if all TPFs has same cadences + if not np.all(cadences[1:, :] - cadences[-1:, :] == 0): + raise ValueError("All TPFs must have same time basis") + + # extract column, row, flux and flux_err from TPFs, making sure we align the pixels + # of same targets in different quarters. + # the mkding I do with npisin can be replace by Pandas merge using the col/row index + # as table index to merge. + ra, dec, column, row, fluxq, flux_errq, unw = [], [], [], [], [], [], [] + if renormalize_tpf_bkg: + flux_bkgq = [] + # we iterate over tuples of target TPFs + for j, aux in tqdm( + enumerate(tpfs_tid_qs), + desc="Parsing multi quarter TPFs", + disable=quiet, + total=len(tpfs_tid_qs), + ): + # find the smalles TPF of the same target in mutiple quarters to then mask out + # bigger TPFs. + # This could be addapted in the future to preserve the extra pixels, for now + # the easiest solution is to undersize to the smalles TPF. + shapes = [np.product(x.shape[1:]) for x in aux] + smallest = np.argmin(shapes) + column_smallest, row_smallest = np.meshgrid( + np.arange(aux[smallest].shape[2]) + aux[smallest].column, + np.arange(aux[smallest].shape[1]) + aux[smallest].row, + ) + column.append(column_smallest.ravel()) + row.append(row_smallest.ravel()) + # convert to radec assuming pointing is consistent across quarters we use WCS + # from one quarter + ra_, dec_ = ( + aux[smallest] + .wcs.wcs_pix2world( + np.vstack( + [ + (column_smallest.ravel() - aux[smallest].column), + (row_smallest.ravel() - aux[smallest].row), + ] + ).T, + 0.0, + ) + .T + ) + ra.append(ra_) + dec.append(dec_) + + tpf_flux, tpf_flux_err = [], [] + if renormalize_tpf_bkg: + flux_bkg = [] + # we iterate over quartes for the same target to parse data + for i, a in enumerate(aux): + column_, row_ = np.meshgrid( + np.arange(a.shape[2]) + a.column, + np.arange(a.shape[1]) + a.row, + ) + # create pixel mask to know which pixels to drop with respect to the + # smalles TPF. + pix_mask = np.isin(column_, column_smallest) & np.isin(row_, row_smallest) + tpf_flux.append(a.flux.value.reshape(a.shape[0], -1)[:, pix_mask.ravel()]) + tpf_flux_err.append( + a.flux_err.value.reshape(a.shape[0], -1)[:, pix_mask.ravel()] + ) + if renormalize_tpf_bkg: + flux_bkg.append( + a.flux_bkg.value.reshape(a.shape[0], -1)[:, pix_mask.ravel()] + ) + # save pix 2 TPF mapping + unw.append([np.zeros_like(column_smallest, dtype=int).ravel() + j]) + + # concatenate quarters for the same target + fluxq.append(np.vstack(tpf_flux)) + flux_errq.append(np.vstack(tpf_flux_err)) + if renormalize_tpf_bkg: + flux_bkgq.append(np.vstack(flux_bkg)) + + # append all the TPFs + column = np.hstack(column) + row = np.hstack(row) + ra = np.hstack(ra) + dec = np.hstack(dec) + fluxq = np.hstack(fluxq)[qual_mask] + flux_errq = np.hstack(flux_errq)[qual_mask] + if renormalize_tpf_bkg: + flux_bkgq = np.hstack(flux_bkgq)[qual_mask] + fluxq += flux_bkgq + + unw = np.hstack(unw).ravel() + + # masking out saturated pixels + sat_mask = np.nanmax(fluxq, axis=0) > 1.4e5 + + return ( + # flatten TPF list to keep machine API + tpfs_tid_qs, + times, + fluxq, + flux_errq, + pos_corr1, + pos_corr2, + column, + row, + ra, + dec, + unw, + focus_mask, + qual_mask, + sat_mask, + ) + + def _preprocess( flux, flux_err, - # pos_corr1, - # pos_corr2, unw, locs, ra, dec, column, row, - tpfs, saturated, ): """ @@ -1185,8 +1351,6 @@ def _saturated_pixels_mask(flux, column, row, saturation_limit=1.2e5): dec = dec[mask] flux = flux[:, mask] flux_err = flux_err[:, mask] - # pos_corr1 = pos_corr1[:, mask] - # pos_corr2 = pos_corr2[:, mask] unw = unw[mask] return (flux, flux_err, unw, locs, ra, dec, column, row) @@ -1232,11 +1396,10 @@ def _wcs_from_tpfs(tpfs): def _get_coord_and_query_gaia( - tpfs, magnitude_limit=18, dr=3, ra=None, dec=None, rad=None + tpfs, magnitude_limit=18, dr=3, ra=None, dec=None, rad=None, epoch=None ): """ Calculate ra, dec coordinates and search radius to query Gaia catalog - Parameters ---------- tpfs: @@ -1249,14 +1412,11 @@ def _get_coord_and_query_gaia( Decs to do gaia query rad : float or list of floats Radius to do gaia query - Returns ------- sources: pandas.DataFrame Catalog with query result """ - if not isinstance(tpfs, lk.TargetPixelFileCollection): - raise ValueError("Please pass a `lk.TargetPixelFileCollection`") # find the max circle per TPF that contain all pixel data to query Gaia # CH: Sometimes sources are missing from this...worth checking on @@ -1279,13 +1439,16 @@ def _get_coord_and_query_gaia( else: raise ValueError("Please set all or None of `ra`, `dec`, `rad`") + if epoch is None: + epoch = Time(tpfs[0].time[len(tpfs[0]) // 2], format="jd").jyear + # query Gaia with epoch propagation sources = get_gaia_sources( tuple(ras), tuple(decs), tuple(rads), magnitude_limit=magnitude_limit, - epoch=Time(tpfs[0].time[len(tpfs[0]) // 2], format="jd").jyear, + epoch=epoch, dr=dr, ) @@ -1345,3 +1508,45 @@ def _clean_source_list(sources, ra, dec, pixel_tolerance=4): sources = sources[clean].reset_index(drop=True) return sources, removed_sources + + +def _get_tpf2source(tpfs, sources, tpf_meta): + def get_tpf2source(): + tpf2source = [] + for tpf in tpfs: + tpfra, tpfdec = tpf.get_coordinates(cadence=0) + dra = np.abs(tpfra.ravel() - np.asarray(sources.ra)[:, None]) + ddec = np.abs(tpfdec.ravel() - np.asarray(sources.dec)[:, None]) + tpf2source.append( + np.where( + ( + (dra < 4 * 3 * u.arcsecond.to(u.deg)) + & (ddec < 4 * 3 * u.arcsecond.to(u.deg)) + ).any(axis=1) + )[0] + ) + return tpf2source + + tpf2source = get_tpf2source() + sources = sources[ + np.in1d(np.arange(len(sources)), np.hstack(tpf2source)) + ].reset_index(drop=True) + # sources, _ = _clean_source_list(sources, ra, dec) + tpf_meta["sources"] = get_tpf2source() + + idx, sep, _ = match_coordinates_sky( + SkyCoord(tpf_meta["ra"], tpf_meta["dec"], unit="deg"), + SkyCoord(np.asarray(sources[["ra", "dec"]]), unit="deg"), + ) + match = (sep < 1 * u.arcsec) & ( + np.abs( + np.asarray(sources["phot_g_mean_mag"][idx]) + - np.asarray([t if t is not None else np.nan for t in tpf_meta["tpfmag"]]) + ) + < 0.25 + ) + + sources["tpf_id"] = None + sources.loc[idx[match], "tpf_id"] = np.asarray(tpf_meta["targetid"])[match] + + return sources, tpf_meta From 2729bdbe1681cc71e335a35d6270c56e53f603ad Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 16 Mar 2022 15:47:29 -0700 Subject: [PATCH 4/5] hotfix --- src/psfmachine/tpf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index a12e46f..ba206d3 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -950,6 +950,7 @@ def from_TPFs( dec, column, row, + tpfs, saturated_mask, ) @@ -1300,6 +1301,7 @@ def _preprocess( dec, column, row, + tpfs, saturated, ): """ From 9d8fa252195dfc8011bbab2ddbd8e5fb1820fa25 Mon Sep 17 00:00:00 2001 From: Jorge Date: Thu, 17 Mar 2022 11:23:48 -0700 Subject: [PATCH 5/5] removing old lines --- src/psfmachine/machine.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index b08eb30..d10a426 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -1048,9 +1048,6 @@ def plot_time_model(self, segment=0): radius=self.time_radius, spacing=self.cartesian_knot_spacing, ) - # if self.seg_splits.shape[0] == 2 and segment == 0: - # seg_mask = np.ones(time_binned.shape[0], dtype=bool) - # else: seg_mask = (time_binned[:, 0] >= time_original[self.seg_splits[segment]]) & ( time_binned[:, 0] < time_original[self.seg_splits[segment + 1] - 1] )