diff --git a/src/psfmachine/ffi.py b/src/psfmachine/ffi.py index ba0183e..0240118 100644 --- a/src/psfmachine/ffi.py +++ b/src/psfmachine/ffi.py @@ -7,15 +7,18 @@ import matplotlib.colors as colors from astropy.io import fits -from astropy.stats import SigmaClip from astropy.time import Time from astropy.wcs import WCS import astropy.units as u from astropy.stats import sigma_clip -from photutils import Background2D, MedianBackground, BkgZoomInterpolator # from . import PACKAGEDIR -from .utils import do_tiled_query, _make_A_cartesian, solve_linear_model +from .utils import ( + do_tiled_query, + _make_A_cartesian, + solve_linear_model, + _load_ffi_image, +) from .tpf import _clean_source_list from .machine import Machine @@ -94,8 +97,9 @@ def __init__( self.row = row self.ra = ra self.dec = dec - # keep 2d image for easy plotting - self.flux_2d = flux + self.meta = meta + self.wcs = wcs + self.image_shape = flux.shape[1:] # reshape flux and flux_err as [ntimes, npix] self.flux = flux.reshape(flux.shape[0], -1) @@ -105,18 +109,16 @@ def __init__( # remove background and mask bright/saturated pixels # these steps need to be done before `machine` init, so sparse delta # and flux arrays have the same shape - if not meta["BACKAPP"]: - self._remove_background() - self._mask_pixels() - - # remove nan pixels - valid_pix = np.isfinite(self.flux).sum(axis=0).astype(bool) - self.flux = self.flux[:, valid_pix] - self.flux_err = self.flux_err[:, valid_pix] - self.ra = self.ra[valid_pix] - self.dec = self.dec[valid_pix] - self.row = self.row[valid_pix] - self.column = self.column[valid_pix] + thumb = np.min(self.flux, axis=0).reshape(self.image_shape) + gthumb = np.hypot(*np.gradient(thumb)) + mask = ( + ~sigma_clip( + np.ma.masked_array(gthumb, gthumb > 500), + sigma=3, + cenfunc=lambda x, axis: 0, + ).mask + ).ravel() + self._remove_background(mask=mask) # init `machine` object super().__init__( @@ -139,8 +141,11 @@ def __init__( # hardcoded to work for Kepler and TESS FFIs sparse_dist_lim=40 if meta["TELESCOP"] == "Kepler" else 210, ) - self.meta = meta - self.wcs = wcs + self._mask_pixels() + + @property + def flux_2d(self): + return self.flux.reshape((self.flux.shape[0], *self.image_shape)) def __repr__(self): return f"FFIMachine (N sources, N times, N pixels): {self.shape}" @@ -185,31 +190,13 @@ def from_file( A Machine class object built from the FFI. """ # load FITS files and parse arrays - ( - wcs, - time, - flux, - flux_err, - ra, - dec, - column, - row, - metadata, - ) = _load_file(fname, extension=extension) - # create cutouts if asked - if cutout_size is not None: - flux, flux_err, ra, dec, column, row = _do_image_cutout( - flux, - flux_err, - ra, - dec, - column, - row, - cutout_size=cutout_size, - cutout_origin=cutout_origin, - ) - # hardcoded: the grid size to do the Gaia tiled query. This is different for - # cutouts and full channel. TESS and Kepler also need different grid sizes. + (wcs, time, flux, flux_err, ra, dec, column, row, metadata,) = _load_file( + fname, + extension=extension, + cutout_size=cutout_size, + cutout_origin=cutout_origin, + ) + if metadata["TELESCOP"] == "Kepler": ngrid = (2, 2) if flux.shape[1] <= 500 else (4, 4) else: @@ -219,7 +206,7 @@ def from_file( ra, dec, wcs, - magnitude_limit=18 if metadata["TELESCOP"] == "Kepler" else 15, + magnitude_limit=18, epoch=time.jyear.mean(), ngrid=ngrid, dr=3, @@ -455,7 +442,7 @@ def save_flux_values(self, output=None, format="fits"): return - def _remove_background(self, mask=None): + def _remove_background(self, mask=None, pixel_knot_spacing=10): """ Background removal. It models the background using a median estimator, rejects flux values with sigma clipping. It modiffies the attributes `flux` and @@ -466,29 +453,37 @@ def _remove_background(self, mask=None): mask : numpy.ndarray of booleans Mask to reject pixels containing sources. Default None. """ - # model background for all cadences - self.background_model = np.array( - [ - Background2D( - flux_2d, - mask=mask, - box_size=(64, 50), - filter_size=15, - exclude_percentile=20, - sigma_clip=SigmaClip(sigma=3.0, maxiters=5), - bkg_estimator=MedianBackground(), - interpolator=BkgZoomInterpolator(order=3), - ).background - for flux_2d in self.flux_2d - ] - ) - # substract background - self.flux_2d -= self.background_model - # flatten flix image - self.flux = self.flux_2d.reshape(self.flux_2d.shape[0], -1) + if not self.meta["BACKAPP"]: + cutout_size = np.max( + [ + self.row.max() - self.row.min() + 1, + self.column.max() - self.column.min() + 1, + ] + ) + n_knots = np.min([cutout_size // pixel_knot_spacing, 1]) + if mask is None: + k = np.ones(self.flux.shape[1]) + else: + k = mask + X = _make_A_cartesian( + self.row - self.row.min() - cutout_size / 2, + self.column - self.column.min() - cutout_size / 2, + spacing="linear", + radius=(cutout_size) // 2, + n_knots=n_knots, + ) + ws = np.linalg.solve( + X[k].T.dot(X[k]).toarray() + + np.diag(1 / (np.ones(X.shape[1]) * 1000000)), + X[k].T.dot(self.flux[:, k].T), + ) + bkg = X.dot(ws).T + + self.flux -= bkg + self.meta["BACKAPP"] = True return - def _saturated_pixels_mask(self, saturation_limit=1.5e5, tolerance=3): + def _saturated_pixels_mask(self, saturation_limit=1.3e5, tolerance=3): """ Finds and removes saturated pixels, including bleed columns. @@ -568,6 +563,8 @@ def _mask_pixels(self, pixel_saturation_limit=1.2e5, magnitude_bright_limit=8): """ Mask saturated pixels and halo/difraction pattern from bright sources. + Masks pixels directly in the Machine masks...!!! + Parameters ---------- pixel_saturation_limit: float @@ -583,16 +580,15 @@ def _mask_pixels(self, pixel_saturation_limit=1.2e5, magnitude_bright_limit=8): self.non_bright_source_mask = ~self._bright_sources_mask( magnitude_limit=magnitude_bright_limit ) - good_pixels = self.non_sat_pixel_mask & self.non_bright_source_mask - - self.column = self.column[good_pixels] - self.row = self.row[good_pixels] - self.ra = self.ra[good_pixels] - self.dec = self.dec[good_pixels] - self.flux = self.flux[:, good_pixels] - self.flux_err = self.flux_err[:, good_pixels] - - return + pixel_mask = self.non_sat_pixel_mask & self.non_bright_source_mask + self.rough_mask = self.rough_mask.multiply(pixel_mask).tocsr() + self.rough_mask.eliminate_zeros() + self.source_mask = self.source_mask.multiply(pixel_mask).tocsr() + self.source_mask.eliminate_zeros() + self.uncontaminated_source_mask = self.uncontaminated_source_mask.multiply( + pixel_mask + ).tocsr() + self.uncontaminated_source_mask.eliminate_zeros() def residuals(self, plot=False, zoom=False, metric="residuals"): """ @@ -826,7 +822,7 @@ def plot_pixel_masks(self, ax=None): return ax -def _load_file(fname, extension=1): +def _load_file(fname, extension=1, cutout_size=256, cutout_origin=[0, 0]): """ Helper function to load FFI files and parse data. It parses the FITS files to extract the image data and metadata. It checks that all files provided in fname @@ -838,6 +834,10 @@ def _load_file(fname, extension=1): Name of the FFI files extension : int Number of HDU extension to use, for Kepler FFIs this corresponds to the channel + cutout_size: int + Size of (square) portion of FFIs to cut out + cutout_origin: tuple + Coordinates of the origin of the cut out Returns ------- @@ -860,9 +860,10 @@ def _load_file(fname, extension=1): meta : dict Dictionary with metadata """ - if not isinstance(fname, list): + if not isinstance(fname, (list, np.ndarray)): fname = np.sort([fname]) imgs = [] + imgs_err = [] times = [] telescopes = [] dct_types = [] @@ -872,39 +873,56 @@ def _load_file(fname, extension=1): if not os.path.isfile(f): raise FileNotFoundError("FFI calibrated fits file does not exist: ", f) - hdul = fits.open(f) - header = hdul[0].header - telescopes.append(header["TELESCOP"]) + hdul = fits.open(f, lazy_load_hdus=None) + primary_header = hdul[0].header + hdr = hdul[extension].header + telescopes.append(primary_header["TELESCOP"]) # kepler if f.split("/")[-1].startswith("kplr"): - dct_types.append(header["DCT_TYPE"]) - quarters.append(header["QUARTER"]) - extensions.append(hdul[extension].header["CHANNEL"]) - hdr = hdul[extension].header + dct_types.append(primary_header["DCT_TYPE"]) + quarters.append(primary_header["QUARTER"]) + extensions.append(hdr["CHANNEL"]) times.append((hdr["MJDEND"] + hdr["MJDSTART"]) / 2) - imgs.append(hdul[extension].data) + # imgs.append(hdul[extension].data) # K2 elif f.split("/")[-1].startswith("ktwo"): - dct_types.append(header["DCT_TYPE"]) - quarters.append(header["CAMPAIGN"]) - extensions.append(hdul[extension].header["CHANNEL"]) - hdr = hdul[extension].header + dct_types.append(primary_header["DCT_TYPE"]) + quarters.append(primary_header["CAMPAIGN"]) + extensions.append(hdr["CHANNEL"]) times.append((hdr["MJDEND"] + hdr["MJDSTART"]) / 2) - imgs.append(hdul[extension].data) + # imgs.append(hdul[extension].data) # TESS elif f.split("/")[-1].startswith("tess"): - dct_types.append(header["CREATOR"].split(" ")[-1].upper()) + dct_types.append(primary_header["CREATOR"].split(" ")[-1].upper()) quarters.append(f.split("/")[-1].split("-")[1]) - hdr = hdul[1].header times.append((hdr["TSTART"] + hdr["TSTOP"]) / 2) - imgs.append(hdul[1].data) extensions.append("%i.%i" % (hdr["CAMERA"], hdr["CCD"])) - # raise NotImplementedError + # imgs.append(hdul[1].data) else: raise ValueError("FFI is not from Kepler or TESS.") - if i == 0: wcs = WCS(hdr) + col_2d, row_2d, f2d = _load_ffi_image( + telescopes[-1], + f, + extension, + cutout_size, + cutout_origin, + return_coords=True, + ) + imgs.append(f2d) + else: + imgs.append( + _load_ffi_image( + telescopes[-1], f, extension, cutout_size, cutout_origin + ) + ) + if telescopes[-1].lower() in ["tess"]: + imgs_err.append( + _load_ffi_image(telescopes[-1], f, 2, cutout_size, cutout_origin) + ) + else: + imgs_err.append(imgs[-1] ** 0.5) # check for integrity of files, same telescope, all FFIs and same quarter/campaign if len(set(telescopes)) != 1: @@ -921,7 +939,7 @@ def _load_file(fname, extension=1): "MISSION", "DATSETNM", ] - meta = {k: header[k] for k in attrs if k in header.keys()} + meta = {k: primary_header[k] for k in attrs if k in primary_header.keys()} attrs = [ "RADESYS", "EQUINOX", @@ -939,11 +957,8 @@ def _load_file(fname, extension=1): tdx = np.argsort(times) times = times[tdx] - # remove overscan of image - row_2d, col_2d, flux_2d = _remove_overscan(meta["TELESCOP"], np.array(imgs)[tdx]) - # kepler FFIs have uncent maps stored in different files, so we use Poison noise as - # flux error for now. - flux_err_2d = np.sqrt(np.abs(flux_2d)) + imgs = np.asarray(imgs) + imgs_err = np.asarray(imgs_err) # convert to RA and Dec ra, dec = wcs.all_pix2world(np.vstack([col_2d.ravel(), row_2d.ravel()]).T, 0.0).T @@ -951,16 +966,16 @@ def _load_file(fname, extension=1): # doesn't exist or is wrong, it could produce RA Dec values out of bound. if ra.min() < 0.0 or ra.max() > 360 or dec.min() < -90 or dec.max() > 90: raise ValueError("WCS lead to out of bound RA and Dec coordinates.") - ra_2d = ra.reshape(flux_2d.shape[1:]) - dec_2d = dec.reshape(flux_2d.shape[1:]) + ra_2d = ra.reshape(col_2d.shape) + dec_2d = dec.reshape(col_2d.shape) - del hdul, header, hdr, imgs, ra, dec + del hdul, primary_header, hdr, ra, dec return ( wcs, times, - flux_2d, - flux_err_2d, + imgs, + imgs_err, ra_2d, dec_2d, col_2d, @@ -1016,125 +1031,6 @@ def _get_sources(ra, dec, wcs, img_limits=[[0, 0], [0, 0]], square=True, **kwarg return sources -def _do_image_cutout( - flux, flux_err, ra, dec, column, row, cutout_size=100, cutout_origin=[0, 0] -): - """ - Creates a cutout of the full image. Return data arrays corresponding to the cutout. - - Parameters - ---------- - flux : numpy.ndarray - Data array with Flux values, correspond to full size image. - flux_err : numpy.ndarray - Data array with Flux errors values, correspond to full size image. - ra : numpy.ndarray - Data array with RA values, correspond to full size image. - dec : numpy.ndarray - Data array with Dec values, correspond to full size image. - column : numpy.ndarray - Data array with pixel column values, correspond to full size image. - row : numpy.ndarray - Data array with pixel raw values, correspond to full size image. - cutout_size : int - Size in pixels of the cutout, assumedto be squared. Default is 100. - cutout_origin : tuple of ints - Origin of the cutout following matrix indexing. Default is [0 ,0]. - - Returns - ------- - flux : numpy.ndarray - Data array with Flux values of the cutout. - flux_err : numpy.ndarray - Data array with Flux errors values of the cutout. - ra : numpy.ndarray - Data array with RA values of the cutout. - dec : numpy.ndarray - Data array with Dec values of the cutout. - column : numpy.ndarray - Data array with pixel column values of the cutout. - row : numpy.ndarray - Data array with pixel raw values of the cutout. - """ - if (cutout_size + cutout_origin[0] <= flux.shape[1]) and ( - cutout_size + cutout_origin[1] <= flux.shape[2] - ): - column = column[ - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - row = row[ - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - flux = flux[ - :, - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - flux_err = flux_err[ - :, - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - ra = ra[ - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - dec = dec[ - cutout_origin[0] : cutout_origin[0] + cutout_size, - cutout_origin[1] : cutout_origin[1] + cutout_size, - ] - else: - raise ValueError("Cutout size is larger than image shape ", flux.shape) - - return flux, flux_err, ra, dec, column, row - - -def _remove_overscan(telescope, imgs): - """ - Removes overscan of the CCD. Return the image data with overscan columns and rows - removed, also return 2D data arrays with pixel columns and row values. - - Parameters - ---------- - telescope : string - Name of the telescope. - imgs : numpy.ndarray - Array of 2D images to. Has shape of [n_times, image_height, image_width]. - - Returns - ------- - row_2d : numpy.ndarray - Data array with pixel row values - col_2d : numpy.ndarray - Data array with pixel column values - flux_2d : numpy.ndarray - Data array with flux values - """ - if telescope == "Kepler": - # CCD overscan for Kepler - r_min = 20 - r_max = 1044 - c_min = 12 - c_max = 1112 - elif telescope == "TESS": - # CCD overscan for TESS - r_min = 0 - r_max = 2048 - c_min = 45 - c_max = 2093 - else: - raise TypeError("File is not from Kepler or TESS mission") - # remove overscan - row_2d, col_2d = np.mgrid[: imgs[0].shape[0], : imgs[0].shape[1]] - col_2d = col_2d[r_min:r_max, c_min:c_max] - row_2d = row_2d[r_min:r_max, c_min:c_max] - flux_2d = imgs[:, r_min:r_max, c_min:c_max] - - return row_2d, col_2d, flux_2d - - def _compute_coordinate_offset(ra, dec, flux, sources, plot=True): """ Compute coordinate offsets if the RA Dec of objects in source catalog don't align diff --git a/src/psfmachine/machine.py b/src/psfmachine/machine.py index f02dfde..f347d04 100644 --- a/src/psfmachine/machine.py +++ b/src/psfmachine/machine.py @@ -17,6 +17,7 @@ sparse_lessthan, _combine_A, threshold_bin, + _find_uncontaminated_pixels, ) from .aperture import optimize_aperture, compute_FLFRCSAP, compute_CROWDSAP @@ -124,6 +125,8 @@ def __init__( phi: numpy.ndarray Angle between pixel and source coordinates (polar coordinates), in units of radians + rough_mask: scipy.sparce.csr_matrix + Sparce mask matrix with pixels that are close to sources, simple round mask source_mask: scipy.sparce.csr_matrix Sparce mask matrix with pixels that contains flux from sources uncontaminated_source_mask: scipy.sparce.csr_matrix @@ -166,12 +169,19 @@ def __init__( self.rmax = rmax self.cut_r = cut_r self.sparse_dist_lim = sparse_dist_lim * u.arcsecond - self.time_corrector = "polynomial" + self.time_corrector = "centroid" # if pos_corr, then we can smooth the vector with a Gaussian kernel of size # poscorr_filter_size, if this is < 0.5 -> no smoothing, default is 12 # beacause of 6hr-CDPP self.poscorr_filter_size = 12 - self.cartesian_knot_spacing = "sqrt" + self.cartesian_knot_spacing = "linear" + self.pixel_scale = ( + np.hypot( + np.min(np.abs(np.diff(self.ra))), np.min(np.abs(np.diff(self.dec))) + ) + * u.deg + ).to(u.arcsecond) + # disble tqdm prgress bar when running in HPC self.quiet = False @@ -184,13 +194,17 @@ def __init__( self.nt = len(self.time) self.npixels = self.flux.shape[1] + self.source_flux_estimates = np.copy(np.asarray(self.sources.phot_g_mean_flux)) # Hardcoded: sparse implementation is efficient when nsourxes * npixels < 1e7 # (JMP profile this) # https://github.com/SSDataLab/psfmachine/pull/17#issuecomment-866382898 - if self.nsources * self.npixels < 1e7: - self._create_delta_arrays() - else: - self._create_delta_sparse_arrays() + self.ra_centroid, self.dec_centroid = np.zeros((2)) * u.deg + self._update_delta_arrays() + self._get_centroid() + self._update_delta_arrays() + self._get_source_mask() + # self._update_delta_arrays() + return @property def shape(self): @@ -199,49 +213,95 @@ def shape(self): def __repr__(self): return f"Machine (N sources, N times, N pixels): {self.shape}" - def _create_delta_arrays(self, centroid_offset=[0, 0]): + @property + def dx(self): + """Delta RA, corrected for centroid shift""" + if not sparse.issparse(self.dra): + return self.dra - self.ra_centroid.value + else: + ra_offset = sparse.csr_matrix( + ( + np.repeat( + self.ra_centroid.value, + self.dra.data.shape, + ), + (self.dra.nonzero()), + ), + shape=self.dra.shape, + dtype=float, + ) + return self.dra - ra_offset + + @property + def dy(self): + """Delta Dec, corrected for centroid shift""" + if not sparse.issparse(self.ddec): + return self.ddec - self.dec_centroid.value + else: + dec_offset = sparse.csr_matrix( + ( + np.repeat( + self.dec_centroid.value, + self.ddec.data.shape, + ), + (self.ddec.nonzero()), + ), + shape=self.ddec.shape, + dtype=float, + ) + return self.ddec - dec_offset + + def _update_delta_arrays(self, frame_indices="mean"): + if self.nsources * self.npixels < 1e7: + self._update_delta_numpy_arrays() + else: + self._update_delta_sparse_arrays() + + def _update_delta_numpy_arrays(self, frame_indices="mean"): """ Creates dra, ddec, r and phi numpy ndarrays . Parameters ---------- - centroid_offset : list - Centroid offset for [ra, dec] to be included in dra and ddec computation. - Default is [0, 0]. + frame_indices : list or str + "mean" takes the mean of all the centroids in "time_mask" + """ # The distance in ra & dec from each source to each pixel # when centroid offset is 0 (i.e. first time creating arrays) create delta # arrays from scratch - if centroid_offset[0] == centroid_offset[1] == 0: + + if not hasattr(self, "dra"): self.dra, self.ddec = np.asarray( [ [ - self.ra - self.sources["ra"][idx] - centroid_offset[0], - self.dec - self.sources["dec"][idx] - centroid_offset[1], + self.ra - self.sources["ra"][idx], + self.dec - self.sources["dec"][idx], ] for idx in range(len(self.sources)) ] ).transpose(1, 0, 2) - self.dra = self.dra * (u.deg) - self.ddec = self.ddec * (u.deg) - # when offsets are != 0 (i.e. updating dra and ddec arrays) we just substract - # the ofsets avoiding the for loop - else: - self.dra -= centroid_offset[0] * u.deg - self.ddec -= centroid_offset[1] * u.deg + # self.dra = self.dra * (u.deg) + # self.ddec = self.ddec * (u.deg) # convertion to polar coordinates - self.r = np.hypot(self.dra, self.ddec).to("arcsec") - self.phi = np.arctan2(self.ddec, self.dra) + self.r = np.hypot( + self.dx * (u.deg.to(u.arcsecond)), + self.dy * (u.deg.to(u.arcsecond)), + ) + self.phi = np.arctan2( + self.dy, + self.dx, + ) - def _create_delta_sparse_arrays(self, dist_lim=40, centroid_offset=[0, 0]): + def _update_delta_sparse_arrays(self, frame_indices="mean", dist_lim=50): """ Creates dra, ddec, r and phi arrays as sparse arrays to be used for dense data, e.g. Kepler FFIs or cluster fields. Assuming that there is no flux information further than `dist_lim` for a given source, we only keep pixels within the `dist_lim`. dra, ddec, ra, and phi are unitless because they are `sparse.csr_matrix`. But - keep same scale as '_create_delta_arrays()'. + keep same scale as '_update_delta_arrays()'. dra and ddec in deg. r in arcseconds and phi in rads Parameters @@ -252,11 +312,11 @@ def _create_delta_sparse_arrays(self, dist_lim=40, centroid_offset=[0, 0]): Centroid offset for [ra, dec] to be included in dra and ddec computation. Default is [0, 0]. """ + if frame_indices == "mean": + frame_indices = np.where(self.time_mask)[0] # If not centroid offsets or centroid correction are larget than a pixel, # then we need to compute the sparse delta arrays from scratch - if (centroid_offset[0] == centroid_offset[1] == 0) or ( - np.maximum(*np.abs(centroid_offset)) > 4 / 3600 - ): + if not hasattr(self, "dra"): # iterate over sources to only keep pixels within dist_lim # this is inefficient, could be done in a tiled manner? only for squared data dra, ddec, sparse_mask = [], [], [] @@ -265,8 +325,8 @@ def _create_delta_sparse_arrays(self, dist_lim=40, centroid_offset=[0, 0]): desc="Creating delta arrays", disable=self.quiet, ): - dra_aux = self.ra - self.sources["ra"].iloc[i] - centroid_offset[0] - ddec_aux = self.dec - self.sources["dec"].iloc[i] - centroid_offset[1] + dra_aux = self.ra - self.sources["ra"].iloc[i] + ddec_aux = self.dec - self.sources["dec"].iloc[i] box_mask = sparse.csr_matrix( (np.abs(dra_aux) <= self.sparse_dist_lim.to("deg").value) & (np.abs(ddec_aux) <= self.sparse_dist_lim.to("deg").value) @@ -283,32 +343,16 @@ def _create_delta_sparse_arrays(self, dist_lim=40, centroid_offset=[0, 0]): sparse_mask.eliminate_zeros() # if centroid correction is less than 1 pixel, then we just update dra and ddec # sparse arrays arrays and r and phi. - else: - self.dra = self.dra - sparse.csr_matrix( - ( - np.repeat(centroid_offset[0], self.dra.data.shape), - (self.dra.nonzero()), - ), - shape=self.dra.shape, - dtype=float, - ) - self.ddec = self.ddec - sparse.csr_matrix( - ( - np.repeat(centroid_offset[1], self.ddec.data.shape), - (self.ddec.nonzero()), - ), - shape=self.ddec.shape, - dtype=float, - ) - sparse_mask = self.dra.astype(bool) + + sparse_mask = self.dra.astype(bool) # convertion to polar coordinates. We can't apply np.hypot or np.arctan2 to # sparse arrays. We keep track of non-zero index, do math in numpy space, # then rebuild r, phi as sparse. nnz_inds = sparse_mask.nonzero() # convert radial dist to arcseconds - r_vals = np.hypot(self.dra.data, self.ddec.data) * 3600 - phi_vals = np.arctan2(self.ddec.data, self.dra.data) + r_vals = np.hypot(self.dx.data, self.dy.data) * (u.deg.to(u.arcsecond)) + phi_vals = np.arctan2(self.dy.data, self.dx.data) self.r = sparse.csr_matrix( (r_vals, (nnz_inds[0], nnz_inds[1])), shape=sparse_mask.shape, @@ -322,298 +366,104 @@ def _create_delta_sparse_arrays(self, dist_lim=40, centroid_offset=[0, 0]): del r_vals, phi_vals, nnz_inds, sparse_mask return - def _get_source_mask( - self, - upper_radius_limit=28.0, - lower_radius_limit=4.5, - upper_flux_limit=2e5, - lower_flux_limit=100, - correct_centroid_offset=True, - plot=False, - ): - """Find the pixel mask that identifies pixels with contributions from ANY NUMBER of Sources + def _get_source_mask(self, source_flux_limit=1): + """Find the round pixel mask that identifies pixels with contributions from ANY of source - Fits a simple polynomial model to the log of the pixel flux values, in radial dimension and source flux, - to find the optimum circular apertures for every source. + Firstly, makes a `rough_mask` that is ~2 pixels in radius. Then fits a simple + linear trend in radius and flux. Uses this linear trend to identify pixels + that are likely to be over the flux limit, the `source_mask`. + + We then iterate, masking out contaminated pixels in the `source_mask`, to get a better fit + to the simple linear trend. Parameters ---------- - upper_radius_limit: float - The radius limit at which we assume there is no flux from a source of any brightness (arcsec) - lower_radius_limit: float - The radius limit at which we assume there is flux from a source of any brightness (arcsec) - upper_flux_limit: float - The flux at which we assume as source is saturated - lower_flux_limit: float - The flux at which we assume a source is too faint to model - correct_centroid_offset: bool - Correct the dra, ddec arrays from centroid offsets. If centroid offsets are - larger than 1 arcsec, `source_mask` will be also updated. - plot: bool - Whether to show diagnostic plot. Default is False + source_radius_limit: float + Upper limit on radius of circular apertures in arcsecond. """ - - if not hasattr(self, "source_flux_estimates"): - # gaia estimate flux values per pixel to be used as flux priors - self.source_flux_estimates = np.copy( - np.asarray(self.sources.phot_g_mean_flux) - ) - # We will use the radius a lot, this is for readibility - # don't do if sparse array - if isinstance(self.r, u.quantity.Quantity): - r = self.r.value + self.radius = 2 * self.pixel_scale.to(u.arcsecond).value + if not sparse.issparse(self.r): + self.rough_mask = sparse.csr_matrix(self.r < self.radius) else: - r = self.r - - # The average flux, which we assume is a good estimate of the whole stack of images - max_flux = np.nanmax(self.flux[self.time_mask], axis=0) - - # Mask out sources that are above the flux limit, and pixels above the radius limit - source_rad = 0.5 * np.log10(self.source_flux_estimates) ** 1.5 + 3 - # temp_mask for the sparse array case should also be a sparse matrix. Then it is - # applied to r, max_flux, and, source_flux_estimates to be used later. - # Numpy array case: - if not isinstance(r, sparse.csr_matrix): - # First we make a guess that each source has exactly the gaia flux - source_flux_estimates = np.asarray(self.sources.phot_g_mean_flux)[ - :, None - ] * np.ones((self.nsources, self.npixels)) - temp_mask = (r < source_rad[:, None]) & ( - source_flux_estimates < upper_flux_limit - ) - temp_mask &= temp_mask.sum(axis=0) == 1 - - # apply temp_mask to r - r_temp_mask = r[temp_mask] - - # log of flux values - f = np.log10((temp_mask.astype(float) * max_flux)) - f_temp_mask = f[temp_mask] - # weights = ( - # (self.flux_err ** 0.5).sum(axis=0) ** 0.5 / self.flux.shape[0] - # ) * temp_mask - - # flux estimates - mf = np.log10(source_flux_estimates[temp_mask]) - # sparse array case: - else: - source_flux_estimates = self.r.astype(bool).multiply( - self.source_flux_estimates[:, None] - ) - temp_mask = sparse_lessthan(r, source_rad) - temp_mask = temp_mask.multiply( - sparse_lessthan(source_flux_estimates, upper_flux_limit) - ).tocsr() - temp_mask = temp_mask.multiply(temp_mask.sum(axis=0) == 1).tocsr() - temp_mask.eliminate_zeros() - - f = np.log10(temp_mask.astype(float).multiply(max_flux).data) - k = np.isfinite(f) - f_temp_mask = f[k] - r_temp_mask = temp_mask.astype(float).multiply(r).data[k] - mf = np.log10( - temp_mask.astype(float).multiply(source_flux_estimates).data[k] + self.rough_mask = sparse_lessthan(self.r, self.radius) + self.source_mask = self.rough_mask.copy() + self.source_mask.eliminate_zeros() + self.uncontaminated_source_mask = _find_uncontaminated_pixels(self.source_mask) + for count in [0, 1]: + # self._get_centroid() + # self._update_delta_arrays() + + mask = self.uncontaminated_source_mask + r = mask.multiply(self.r).data + max_f = np.log10( + mask.astype(float) + .multiply(np.nanmax(self.flux[self.time_mask], axis=0)) + .multiply(1 / self.source_flux_estimates[:, None]) + .data ) - - # Model is polynomial in r and log of the flux estimate. - # Here I'm using a 1st order polynomial, to ensure it's monatonic in each dimension - A = np.vstack( - [ - r_temp_mask ** 0, - r_temp_mask, - # r_temp_mask ** 2, - r_temp_mask ** 0 * mf, - r_temp_mask * mf, - # r_temp_mask ** 2 * mf, - # r_temp_mask ** 0 * mf ** 2, - # r_temp_mask * mf ** 2, - # r_temp_mask ** 2 * mf ** 2, - ] - ).T - - # Iteratively fit - k = np.isfinite(f_temp_mask) - for count in [0, 1, 2]: - sigma_w_inv = A[k].T.dot(A[k]) - B = A[k].T.dot(f_temp_mask[k]) - w = np.linalg.solve(sigma_w_inv, B) - res = np.ma.masked_array(f_temp_mask, ~k) - A.dot(w) - k &= ~sigma_clip(res, sigma=3).mask - - # Now find the radius and source flux at which the model reaches the flux limit - test_f = np.linspace( - np.log10(self.source_flux_estimates.min()), - np.log10(self.source_flux_estimates.max()), - 100, - ) - test_r = np.arange(lower_radius_limit, upper_radius_limit, 0.25) - test_r2, test_f2 = np.meshgrid(test_r, test_f) - - test_val = ( - np.vstack( + rbins = np.linspace(0, self.radius * 5, 50) + masks = np.asarray( [ - test_r2.ravel() ** 0, - test_r2.ravel(), - # test_r2.ravel() ** 2, - test_r2.ravel() ** 0 * test_f2.ravel(), - test_r2.ravel() * test_f2.ravel(), - # test_r2.ravel() ** 2 * test_f2.ravel(), - # test_r2.ravel() ** 0 * test_f2.ravel() ** 2, - # test_r2.ravel() * test_f2.ravel() ** 2, - # test_r2.ravel() ** 2 * test_f2.ravel() ** 2, + (r > rbins[idx]) & (r <= rbins[idx + 1]) + for idx in range(len(rbins) - 1) ] ) - .T.dot(w) - .reshape(test_r2.shape) - ) - l = np.zeros(len(test_f)) * np.nan - for idx in range(len(test_f)): - loc = np.where(10 ** test_val[idx] < lower_flux_limit)[0] - if len(loc) > 0: - l[idx] = test_r[loc[0]] - ok = np.isfinite(l) - source_radius_limit = np.polyval( - np.polyfit(test_f[ok], l[ok], 1), np.log10(self.source_flux_estimates) - ) - source_radius_limit[ - source_radius_limit > upper_radius_limit - ] = upper_radius_limit - source_radius_limit[ - source_radius_limit < lower_radius_limit - ] = lower_radius_limit - - # Here we set the radius for each source. We add two pixels, to be generous - self.radius = source_radius_limit + 2 - - # This sparse mask is one where where is ANY number of sources in a pixel - if not isinstance(self.r, sparse.csr_matrix): - self.source_mask = sparse.csr_matrix(self.r.value < self.radius[:, None]) - else: - # for a sparse matrix doing < self.radius is not efficient, it - # considers all zero values in the sparse matrix and set them to True. - # this is a workaround to this problem. - self.source_mask = sparse_lessthan(r, self.radius) - - self._get_uncontaminated_pixel_mask() - - # Now we can update the r and phi estimates, allowing for a slight centroid - # calculate image centroids and correct dra,ddec for offset. - if correct_centroid_offset: - self._get_centroids() - # print(self.ra_centroid_avg.to("arcsec"), self.dec_centroid_avg.to("arcsec")) - # re-estimate dra, ddec with centroid shifts, check if sparse case applies. - # Hardcoded: sparse implementation is efficient when nsourxes * npixels < 1e7 - # (JMP profile this) - # https://github.com/SSDataLab/psfmachine/pull/17#issuecomment-866382898 - if self.nsources * self.npixels < 1e7: - self._create_delta_arrays( - centroid_offset=[ - self.ra_centroid_avg.value, - self.dec_centroid_avg.value, - ] - ) + fbins = np.asarray([np.nanpercentile(max_f[m], 20) for m in masks]) + rbins = rbins[1:] - np.median(np.diff(rbins)) + k = np.isfinite(fbins) + if not k.any(): + raise ValueError("Can not find source mask") + l = np.polyfit(rbins[k], fbins[k], 1) + + if sparse.issparse(r): + mean_model = self.r.copy() + mean_model.data = 10 ** np.polyval(l, mean_model.data) + self.source_mask = ( + mean_model.multiply(self.source_flux_estimates[:, None]) + ) > source_flux_limit else: - self._create_delta_sparse_arrays( - centroid_offset=[ - self.ra_centroid_avg.value, - self.dec_centroid_avg.value, - ] + mean_model = 10 ** np.polyval(l, self.r) + self.source_mask = ( + sparse.csr_matrix(mean_model * self.source_flux_estimates[:, None]) + > source_flux_limit ) - # if centroid offset id larger than 1" then we need to recalculate the - # source mask to include/reject correct pixels. - # this 1 arcsec limit only works for Kepler/K2 - if ( - np.abs(self.ra_centroid_avg.to("arcsec").value) > 1 - or np.abs(self.dec_centroid_avg.to("arcsec").value) > 1 - ): - self._get_source_mask(correct_centroid_offset=False) - - if plot: - k = np.isfinite(f_temp_mask) - # Make a nice diagnostic plot - fig, ax = plt.subplots(1, 2, figsize=(8, 3), facecolor="white") - - ax[0].scatter(r_temp_mask[k], f_temp_mask[k], s=0.4, c="k", label="Data") - ax[0].scatter(r_temp_mask[k], A[k].dot(w), c="r", s=0.4, label="Model") - ax[0].set( - xlabel=("Radius from Source [arcsec]"), - ylabel=("log$_{10}$ Kepler Flux"), - ) - ax[0].legend(frameon=True) - - im = ax[1].pcolormesh( - test_f2, - test_r2, - 10 ** test_val, - vmin=0, - vmax=lower_flux_limit * 2, - cmap="viridis", - shading="auto", - ) - line = np.polyval(np.polyfit(test_f[ok], l[ok], 1), test_f) - line[line > upper_radius_limit] = upper_radius_limit - line[line < lower_radius_limit] = lower_radius_limit - ax[1].plot(test_f, line, color="r", label="Best Fit PSF Edge") - ax[1].set_ylim(lower_radius_limit, upper_radius_limit) - ax[1].legend(frameon=True) - - cbar = plt.colorbar(im, ax=ax) - cbar.set_label("PSF Flux [$e^-s^{-1}$]") - - ax[1].set( - ylabel=("Radius from Source [arcsecond]"), - xlabel=("log$_{10}$ Gaia Source Flux"), + self.uncontaminated_source_mask = _find_uncontaminated_pixels( + self.source_mask ) - return fig return - def _get_uncontaminated_pixel_mask(self): - """ - creates a mask of shape nsources x npixels where targets are not contaminated. - This mask is used to select pixels to build the PSF model. - """ - - # This could be a property, but it is a pain to calculate on the fly, perhaps with lru_cache - self.uncontaminated_source_mask = self.source_mask.multiply( - np.asarray(self.source_mask.sum(axis=0) == 1)[0] - ).tocsr() - # have to remove leaked zeros - self.uncontaminated_source_mask.eliminate_zeros() - - # # reduce to good pixels - # self.uncontaminated_pixel_mask = sparse.csr_matrix( - # self.uncontaminated_source_mask.sum(axis=0) > 0 - # ) - - return - - # CH: We're not currently using this, but it might prove useful later so I will leave for now - def _get_centroids(self): - """ - Find the ra and dec centroid of the image, at each time. - """ - # centroids are astropy quantities - self.ra_centroid = np.zeros(self.nt) - self.dec_centroid = np.zeros(self.nt) - dra_m = self.uncontaminated_source_mask.multiply(self.dra).data - ddec_m = self.uncontaminated_source_mask.multiply(self.ddec).data - for t in range(self.nt): - wgts = self.uncontaminated_source_mask.multiply( - np.sqrt(np.abs(self.flux[t])) - ).data - # mask out non finite values and background pixels - k = (np.isfinite(wgts)) & ( - self.uncontaminated_source_mask.multiply(self.flux[t]).data > 100 + def _get_centroid(self, plot=False): + radius = 1.5 * self.pixel_scale.to(u.arcsecond).value + if not sparse.issparse(self.r): + mask = sparse.csr_matrix(self.r < radius) + else: + mask = sparse_lessthan(self.r, radius) + mask = _find_uncontaminated_pixels(mask) + x, y = ( + mask.multiply(self.dra).data, + mask.multiply(self.ddec).data, + ) + c = ( + mask.multiply(self.flux[self.time_mask].mean(axis=0)) + .multiply(1 / self.source_flux_estimates[:, None]) + .data + ) + bm, nx, ny, nz, nze = threshold_bin(x, y, c, bins=20) + self.ra_centroid, self.dec_centroid = ( + np.average(nx, weights=(nz / nze)) * u.deg, + np.average(ny, weights=(nz / nze)) * u.deg, + ) + if plot: + fig = plt.figure(figsize=(5, 5)) + plt.scatter(nx, ny, c=nz / nze, s=200) + plt.scatter( + self.ra_centroid.value, + self.dec_centroid.value, + c="r", + marker="*", + s=100, ) - self.ra_centroid[t] = np.average(dra_m[k], weights=wgts[k]) - self.dec_centroid[t] = np.average(ddec_m[k], weights=wgts[k]) - del dra_m, ddec_m - self.ra_centroid *= u.deg - self.dec_centroid *= u.deg - self.ra_centroid_avg = self.ra_centroid.mean() - self.dec_centroid_avg = self.dec_centroid.mean() - - return + return fig def _time_bin(self, npoints=200, downsample=False): """Bin the flux data down in time. If using `pos_corr`s as corrector, it will @@ -659,8 +509,8 @@ def _time_bin(self, npoints=200, downsample=False): mpc2 = np.nanmedian(self.pos_corr2, axis=0) else: # if usig centroids need to convert to pixels - mpc1 = self.ra_centroid.to("arcsec").value / 4 - mpc2 = self.dec_centroid.to("arcsec").value / 4 + mpc1 = self.ra_centroids.to("arcsec").value / 4 + mpc2 = self.dec_centroids.to("arcsec").value / 4 # find poscorr discontinuity in each axis grads1 = np.gradient(mpc1, self.time) @@ -810,6 +660,23 @@ def _time_bin(self, npoints=200, downsample=False): return ta, tm, fm_raw, fm, fem + def _time_matrix(self, mask): + dx, dy = ( + mask.multiply(self.dx), + mask.multiply(self.dy), + ) + dx = dx.data * u.deg.to(u.arcsecond) + dy = dy.data * u.deg.to(u.arcsecond) + + A_c = _make_A_cartesian( + dx, + dy, + n_knots=self.n_time_knots, + radius=self.time_radius, + spacing=self.cartesian_knot_spacing, + ) + return A_c + def build_time_model(self, plot=False, downsample=False): """ Builds a time model that moves the PRF model to account for the scene movement @@ -855,29 +722,20 @@ def build_time_model(self, plot=False, downsample=False): ) = self._time_bin(npoints=self.n_time_points, downsample=downsample) self._whitened_time = time_original - # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.uncontaminated_source_mask.multiply(self.dra), - self.uncontaminated_source_mask.multiply(self.ddec), + A_c = self._time_matrix(self.uncontaminated_source_mask) + A2 = sparse.vstack( + [A_c] * time_binned.shape[0], + format="csr", ) - dx = dx.data * u.deg.to(u.arcsecond) - dy = dy.data * u.deg.to(u.arcsecond) - A_c = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - spacing=self.cartesian_knot_spacing, - ) - A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr") - # Cartesian spline with time dependence if hasattr(self, "pos_corr1") and self.time_corrector in [ "pos_corr", "centroid", ]: # Cartesian spline with poscor dependence - A3 = _combine_A(A2, poscorr=[poscorr1_binned, poscorr2_binned]) + A3 = _combine_A( + A2, poscorr=[poscorr1_binned, poscorr2_binned], time=time_binned + ) else: # Cartesian spline with time dependence A3 = _combine_A(A2, time=time_binned) @@ -961,22 +819,11 @@ def plot_time_model(self): flux_err_binned, ) = self._time_bin(npoints=self.n_time_points) - # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.uncontaminated_source_mask.multiply(self.dra), - self.uncontaminated_source_mask.multiply(self.ddec), - ) - dx = dx.data * u.deg.to(u.arcsecond) - dy = dy.data * u.deg.to(u.arcsecond) - - A_c = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - spacing=self.cartesian_knot_spacing, + A_c = self._time_matrix(self.uncontaminated_source_mask) + A2 = sparse.vstack( + [A_c] * time_binned.shape[0], + format="csr", ) - A2 = sparse.vstack([A_c] * time_binned.shape[0], format="csr") # Cartesian spline with time dependence # Cartesian spline with time dependence if hasattr(self, "pos_corr1") and self.time_corrector in [ @@ -984,11 +831,19 @@ def plot_time_model(self): "centroid", ]: # Cartesian spline with poscor dependence - A3 = _combine_A(A2, poscorr=[poscorr1_binned, poscorr2_binned]) + A3 = _combine_A( + A2, poscorr=[poscorr1_binned, poscorr2_binned], time=time_binned + ) else: # Cartesian spline with time dependence A3 = _combine_A(A2, time=time_binned) + dx, dy = ( + self.uncontaminated_source_mask.multiply(self.dx).data + * (u.deg.to(u.arcsecond)), + self.uncontaminated_source_mask.multiply(self.dy).data + * (u.deg.to(u.arcsecond)), + ) model = A3.dot(self.time_model_w).reshape(flux_binned.shape) + 1 fig, ax = plt.subplots(2, 2, figsize=(7, 6), facecolor="w") k1 = self._time_masked.reshape(flux_binned.shape)[0] @@ -1062,45 +917,48 @@ def build_shape_model( **kwargs Keyword arguments to be passed to `_get_source_mask()` """ - # gaia estimate flux values per pixel to be used as flux priors self.source_flux_estimates = np.copy(np.asarray(self.sources.phot_g_mean_flux)) # Mask of shape nsources x number of pixels, one where flux from a # source exists - if not hasattr(self, "source_mask"): - self._get_source_mask(**kwargs) + if not hasattr(self, "uncontaminated_source_mask"): + mask = self.rough_mask + else: + mask = self.uncontaminated_source_mask + # self._get_source_mask(**kwargs) # Mask of shape npixels (maybe by nt) where not saturated, not faint, # not contaminated etc - self._get_uncontaminated_pixel_mask() + # self._get_uncontaminated_pixel_mask() # for iter in range(niters): flux_estimates = self.source_flux_estimates[:, None] if frame_index == "mean": f = (self.flux[self.time_mask]).mean(axis=0) - elif isinstance(frame_index, int): + elif frame_index == "max": + f = (self.flux[self.time_mask]).max(axis=0) + elif isinstance(frame_index, (int, np.int64, np.int32, np.int16)): f = self.flux[frame_index] + else: + raise ValueError(f"frame_index {frame_index} not valid") # f, fe = (self.flux[self.time_mask]).mean(axis=0), ( # (self.flux_err[self.time_mask] ** 2).sum(axis=0) ** 0.5 # ) / (self.nt) mean_f = np.log10( - self.uncontaminated_source_mask.astype(float) - .multiply(f) - .multiply(1 / flux_estimates) - .data + mask.astype(float).multiply(f).multiply(1 / flux_estimates).data ) # Actual Kepler errors cause all sorts of instability # mean_f_err = ( - # self.uncontaminated_source_mask.astype(float) + # mask.astype(float) # .multiply(fe / (f * np.log(10))) # .multiply(1 / flux_estimates) # .data # ) # We only need these weights for the wings, so we'll use poisson noise mean_f_err = ( - self.uncontaminated_source_mask.astype(float) + mask.astype(float) .multiply((f ** 0.5) / (f * np.log(10))) .multiply(1 / flux_estimates) .data @@ -1108,8 +966,8 @@ def build_shape_model( mean_f_err.data = np.abs(mean_f_err.data) # take value from Quantity is not necessary - phi_b = self.uncontaminated_source_mask.multiply(self.phi).data - r_b = self.uncontaminated_source_mask.multiply(self.r).data + phi_b = mask.multiply(self.phi).data + r_b = mask.multiply(self.r).data if bin_data: # number of bins is hardcoded to work with FFI or TPFs accordingly @@ -1139,7 +997,7 @@ def build_shape_model( n_phi_knots=self.n_phi_knots, ) prior_sigma = np.ones(A.shape[1]) * 10 - prior_mu = np.zeros(A.shape[1]) - 10 + prior_mu = np.zeros(A.shape[1]) - 100 nan_mask = np.isfinite(mean_f.ravel()) @@ -1154,7 +1012,17 @@ def build_shape_model( errors=True, ) - bad = sigma_clip(mean_f.ravel() - A.dot(psf_w), sigma=5).mask + self.psf_w = psf_w + self.psf_w_err = psf_w_err + + # # We build the mean model because this builds our source mask, + # # and cuts out pixels that have really low flux values or are in the wings + # self._get_mean_model() + # self.source_mask = mean_model != 0 + # self.source_mask.eliminate_zeros() + # self.uncontaminated_source_mask = _find_uncontaminated_pixels(self.source_mask) + + bad = sigma_clip((mean_f.ravel() - A.dot(psf_w)), sigma=5).mask psf_w, psf_w_err = solve_linear_model( A, @@ -1171,91 +1039,34 @@ def build_shape_model( # We then build the same design matrix for all pixels with flux self._get_mean_model() + # self.source_mask = mean_model != 0 + # self.source_mask.eliminate_zeros() + # self.uncontaminated_source_mask = _find_uncontaminated_pixels(self.source_mask) + + # self._update_source_mask_remove_bkg_pixels() # remove background pixels and recreate mean model - self._update_source_mask_remove_bkg_pixels( - flux_cut_off=flux_cut_off, frame_index=frame_index - ) + # self._update_source_mask_remove_bkg_pixels( + # flux_cut_off=flux_cut_off, frame_index=frame_index + # ) if plot: - return self.plot_shape_model(frame_index=frame_index, bin_data=bin_data) + return self.plot_shape_model(self.source_mask, frame_index=frame_index) return - def _update_source_mask_remove_bkg_pixels(self, flux_cut_off=1, frame_index="mean"): - """ - Update the `source_mask` to remove pixels that do not contribuite to the PRF - shape. - First, re-estimate the source flux usign the precomputed `mean_model`. - This re-estimation is used to remove sources with bad prediction and update - the `source_mask` by removing background pixels that do not contribuite to - the PRF shape. - Pixels with normalized flux > `flux_cut_off` are kept. - - Parameters - ---------- - flux_cut_off : float - Lower limit for the normalized flux predicted from the mean model. - frame_index : string or int - The frame index to be used, if "mean" then use the - mean value across time - """ - - # Re-estimate source flux - # ----- - prior_mu = self.source_flux_estimates - prior_sigma = ( - np.ones(self.mean_model.shape[0]) * 10 * self.source_flux_estimates - ) - - if frame_index == "mean": - f, fe = (self.flux).mean(axis=0), ( - (self.flux_err ** 2).sum(axis=0) ** 0.5 - ) / (self.nt) - elif isinstance(frame_index, int): - f, fe = self.flux[frame_index], self.flux_err[frame_index] - - X = self.mean_model.copy() - X = X.T - - sigma_w_inv = X.T.dot(X.multiply(1 / fe[:, None] ** 2)).toarray() - sigma_w_inv += np.diag(1 / (prior_sigma ** 2)) - B = X.T.dot((f / fe ** 2)) - B += prior_mu / (prior_sigma ** 2) - ws = np.linalg.solve(sigma_w_inv, B) - werrs = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 - - # ----- - - # Rebuild source mask - ok = np.abs(ws - self.source_flux_estimates) / werrs > 3 - ok &= ((ws / self.source_flux_estimates) < 10) & ( - (self.source_flux_estimates / ws) < 10 - ) - ok &= ws > 10 - ok &= werrs > 0 - - self.source_flux_estimates[ok] = ws[ok] - - self.source_mask = ( - self.mean_model.multiply( - self.mean_model.T.dot(self.source_flux_estimates) - ).tocsr() - > flux_cut_off - ) - - # Recreate uncontaminated mask - self._get_uncontaminated_pixel_mask() - # self.uncontaminated_source_mask = self.uncontaminated_source_mask.multiply( - # (self.mean_model.max(axis=1) < 1) - # ) + def _get_mean_model( + self, mask=None # , relative_flux_limit=0.001, absolute_flux_limit=1 + ): - # Recreate mean model! - self._get_mean_model() + if mask is None: + if not hasattr(self, "source_mask"): + mask = self.rough_mask + else: + mask = self.source_mask - def _get_mean_model(self): """Convenience function to make the scene model""" Ap = _make_A_polar( - self.source_mask.multiply(self.phi).data, - self.source_mask.multiply(self.r).data, + mask.multiply(self.phi).data, + mask.multiply(self.r).data, rmin=self.rmin, rmax=self.rmax, cut_r=self.cut_r, @@ -1267,11 +1078,16 @@ def _get_mean_model(self): mean_model = sparse.csr_matrix(self.r.shape) m = 10 ** Ap.dot(self.psf_w) m[~np.isfinite(m)] = 0 - mean_model[self.source_mask] = m + mean_model[mask] = m + # mean_model = mean_model.multiply(mean_model > relative_flux_limit) + # mean_model = mean_model.multiply( + # mean_model.multiply(self.source_flux_estimates[:, None]) + # > absolute_flux_limit + # ) mean_model.eliminate_zeros() self.mean_model = mean_model - def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): + def plot_shape_model(self, mask, bin_data=False, radius=None, frame_index="max"): """ Diagnostic plot of shape model. @@ -1288,29 +1104,39 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): fig : matplotlib.Figure Figure. """ - + # mask = plot_mask.multiply(self.mean_model != 0) + # mask.eliminate_zeros() if frame_index == "mean": mean_f = np.log10( - self.uncontaminated_source_mask.astype(float) + mask.astype(float) .multiply(self.flux[self.time_mask].mean(axis=0)) .multiply(1 / self.source_flux_estimates[:, None]) .data ) + if frame_index == "max": + mean_f = np.log10( + mask.astype(float) + .multiply(self.flux[self.time_mask].max(axis=0)) + .multiply(1 / self.source_flux_estimates[:, None]) + .data + ) elif isinstance(frame_index, int): mean_f = np.log10( - self.uncontaminated_source_mask.astype(float) + mask.astype(float) .multiply(self.flux[frame_index]) .multiply(1 / self.source_flux_estimates[:, None]) .data ) - + vmin, vmax = np.nanpercentile(mean_f, [10, 90]) dx, dy = ( - self.uncontaminated_source_mask.multiply(self.dra), - self.uncontaminated_source_mask.multiply(self.ddec), + mask.multiply(self.dx), + mask.multiply(self.dy), ) dx = dx.data * u.deg.to(u.arcsecond) dy = dy.data * u.deg.to(u.arcsecond) - + phi, r = np.arctan2(dy, dx), np.hypot(dx, dy) + if radius is None: + radius = np.nanmax(r) if bin_data: nbins = 30 if mean_f.shape[0] <= 5e3 else 90 _, dx, dy, mean_f, _ = threshold_bin( @@ -1319,7 +1145,7 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): fig, ax = plt.subplots(3, 2, figsize=(9, 10.5), constrained_layout=True) im = ax[0, 0].scatter( - dx, dy, c=mean_f, cmap="viridis", vmin=-3, vmax=-1, s=3, rasterized=True + dx, dy, c=mean_f, cmap="viridis", vmin=vmin, vmax=vmax, s=3, rasterized=True ) ax[0, 0].set( ylabel=r'$\delta y$ ["]', @@ -1341,9 +1167,8 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): color="tab:red", ) - phi, r = np.arctan2(dy, dx), np.hypot(dx, dy) im = ax[0, 1].scatter( - phi, r, c=mean_f, cmap="viridis", vmin=-3, vmax=-1, s=3, rasterized=True + phi, r, c=mean_f, cmap="viridis", vmin=vmin, vmax=vmax, s=3, rasterized=True ) ax[0, 1].set( ylabel='$r$ ["]', @@ -1361,13 +1186,14 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): n_r_knots=self.n_r_knots, n_phi_knots=self.n_phi_knots, ) + model = A.dot(self.psf_w) im = ax[1, 1].scatter( phi, r, - c=A.dot(self.psf_w), + c=model, cmap="viridis", - vmin=-3, - vmax=-1, + vmin=vmin, + vmax=vmax, s=3, rasterized=True, ) @@ -1381,10 +1207,10 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): im = ax[1, 0].scatter( dx, dy, - c=A.dot(self.psf_w), + c=model, cmap="viridis", - vmin=-3, - vmax=-1, + vmin=vmin, + vmax=vmax, s=3, rasterized=True, ) @@ -1399,7 +1225,7 @@ def plot_shape_model(self, radius=20, frame_index="mean", bin_data=False): cbar = fig.colorbar(im, ax=ax[:2, 1], shrink=0.7, location="right") cbar.set_label("log$_{10}$ Normalized Flux") mean_f = 10 ** mean_f - model = 10 ** A.dot(self.psf_w) + model = 10 ** model im = ax[2, 0].scatter( dx, @@ -1452,6 +1278,21 @@ def fit_model(self, fit_va=False): model has to be built previously with `build_time_model`. """ + self._n_time_components = [ + 6 + if ( + ( + self.time_corrector + in [ + "pos_corr", + "centroid", + ] + ) + & (hasattr(self, "pos_corr1")) + ) + else 4 + ][0] + prior_mu = self.source_flux_estimates # np.zeros(A.shape[1]) prior_sigma = ( np.ones(self.mean_model.shape[0]) @@ -1471,21 +1312,8 @@ def fit_model(self, fit_va=False): ) # not necessary to take value from Quantity to do .multiply() - dx, dy = ( - self.source_mask.multiply(self.dra), - self.source_mask.multiply(self.ddec), - ) - dx = dx.data * u.deg.to(u.arcsecond) - dy = dy.data * u.deg.to(u.arcsecond) - - A_cp = _make_A_cartesian( - dx, - dy, - n_knots=self.n_time_knots, - radius=self.time_radius, - spacing=self.cartesian_knot_spacing, - ) - A_cp3 = sparse.hstack([A_cp, A_cp, A_cp, A_cp], format="csr") + A_cp = self._time_matrix(self.mean_model != 0) + A_cp3 = sparse.hstack([A_cp] * self._n_time_components, format="csr") self.ws_va = np.zeros((self.nt, self.mean_model.shape[0])) self.werrs_va = np.zeros((self.nt, self.mean_model.shape[0])) @@ -1518,18 +1346,23 @@ def fit_model(self, fit_va=False): np.array( [ 1, + self._whitened_time[tdx], + self._whitened_time[tdx] ** 2, self.pos_corr1_smooth[tdx], self.pos_corr2_smooth[tdx], self.pos_corr1_smooth[tdx] * self.pos_corr2_smooth[tdx], ] )[:, None] - * np.ones(A_cp3.shape[1] // 4) + * np.ones(A_cp3.shape[1] // self._n_time_components) ) else: # use time t_mult = np.hstack( - (self._whitened_time[tdx] ** np.arange(4))[:, None] - * np.ones(A_cp3.shape[1] // 4) + ( + self._whitened_time[tdx] + ** np.arange(self._n_time_components) + )[:, None] + * np.ones(A_cp3.shape[1] // self._n_time_components) ) X.data *= A_cp3.multiply(t_mult).dot(self.time_model_w) + 1 X = X.T @@ -1570,9 +1403,8 @@ def fit_model(self, fit_va=False): self.werrs[tdx] = np.linalg.inv(sigma_w_inv).diagonal() ** 0.5 self.model_flux[tdx] = X.dot(self.ws[tdx]) - nodata = np.asarray(self.source_mask.sum(axis=1))[:, 0] == 0 # These sources are poorly estimated - nodata |= (self.mean_model.max(axis=1) > 1).toarray()[:, 0] + nodata = (self.mean_model.max(axis=1) > 1).toarray()[:, 0] self.ws[:, nodata] *= np.nan self.werrs[:, nodata] *= np.nan diff --git a/src/psfmachine/superstamp.py b/src/psfmachine/superstamp.py index 1bc5791..1ada51d 100644 --- a/src/psfmachine/superstamp.py +++ b/src/psfmachine/superstamp.py @@ -15,7 +15,7 @@ from astropy.wcs import WCS import astropy.units as u -from .ffi import FFIMachine, _get_sources, _do_image_cutout +from .ffi import FFIMachine, _get_sources __all__ = ["SSMachine"] @@ -616,3 +616,76 @@ def _load_file(fname): poscorr2, meta, ) + + +def _do_image_cutout( + flux, flux_err, ra, dec, column, row, cutout_size=100, cutout_origin=[0, 0] +): + """ + Creates a cutout of the full image. Return data arrays corresponding to the cutout. + Parameters + ---------- + flux : numpy.ndarray + Data array with Flux values, correspond to full size image. + flux_err : numpy.ndarray + Data array with Flux errors values, correspond to full size image. + ra : numpy.ndarray + Data array with RA values, correspond to full size image. + dec : numpy.ndarray + Data array with Dec values, correspond to full size image. + column : numpy.ndarray + Data array with pixel column values, correspond to full size image. + row : numpy.ndarray + Data array with pixel raw values, correspond to full size image. + cutout_size : int + Size in pixels of the cutout, assumedto be squared. Default is 100. + cutout_origin : tuple of ints + Origin of the cutout following matrix indexing. Default is [0 ,0]. + Returns + ------- + flux : numpy.ndarray + Data array with Flux values of the cutout. + flux_err : numpy.ndarray + Data array with Flux errors values of the cutout. + ra : numpy.ndarray + Data array with RA values of the cutout. + dec : numpy.ndarray + Data array with Dec values of the cutout. + column : numpy.ndarray + Data array with pixel column values of the cutout. + row : numpy.ndarray + Data array with pixel raw values of the cutout. + """ + if (cutout_size + cutout_origin[0] <= flux.shape[1]) and ( + cutout_size + cutout_origin[1] <= flux.shape[2] + ): + column = column[ + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + row = row[ + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + flux = flux[ + :, + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + flux_err = flux_err[ + :, + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + ra = ra[ + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + dec = dec[ + cutout_origin[0] : cutout_origin[0] + cutout_size, + cutout_origin[1] : cutout_origin[1] + cutout_size, + ] + else: + raise ValueError("Cutout size is larger than image shape ", flux.shape) + + return flux, flux_err, ra, dec, column, row diff --git a/src/psfmachine/tpf.py b/src/psfmachine/tpf.py index 9da96bd..3f840dd 100644 --- a/src/psfmachine/tpf.py +++ b/src/psfmachine/tpf.py @@ -611,7 +611,6 @@ def load_shape_model(self, input=None, plot=False): # create source mask and uncontaminated pixel mask self._get_source_mask() - self._get_uncontaminated_pixel_mask() # open file hdu = fits.open(input) @@ -647,7 +646,7 @@ def load_shape_model(self, input=None, plot=False): # work in arcseconds self._get_mean_model() # remove background pixels and recreate mean model - self._update_source_mask_remove_bkg_pixels() + # self._update_source_mask_remove_bkg_pixels() if plot: return self.plot_shape_model() diff --git a/src/psfmachine/utils.py b/src/psfmachine/utils.py index df5356f..736a3b4 100644 --- a/src/psfmachine/utils.py +++ b/src/psfmachine/utils.py @@ -7,6 +7,7 @@ from scipy import sparse from patsy import dmatrix import pyia +import fitsio # size_limit is 1GB cache = diskcache.Cache(directory="~/.psfmachine-cache") @@ -197,32 +198,40 @@ def _make_A_polar(phi, r, cut_r=6, rmin=1, rmax=18, n_r_knots=12, n_phi_knots=15 def _make_A_cartesian(x, y, n_knots=10, radius=3.0, spacing="sqrt"): + # Must be odd + n_odd_knots = n_knots if n_knots % 2 == 1 else n_knots + 1 if spacing == "sqrt": - x_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) + x_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_odd_knots) x_knots = np.sign(x_knots) * x_knots ** 2 + y_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_odd_knots) + y_knots = np.sign(y_knots) * y_knots ** 2 else: - x_knots = np.linspace(-radius, radius, n_knots) + x_knots = np.linspace(-radius, radius, n_odd_knots) + y_knots = np.linspace(-radius, radius, n_odd_knots) x_spline = sparse.csr_matrix( np.asarray( dmatrix( "bs(x, knots=knots, degree=3, include_intercept=True)", - {"x": list(x), "knots": x_knots}, + { + "x": list(np.hstack([x_knots.min(), x, x_knots.max()])), + "knots": x_knots, + }, ) - ) + )[1:-1] ) - if spacing == "sqrt": - y_knots = np.linspace(-np.sqrt(radius), np.sqrt(radius), n_knots) - y_knots = np.sign(y_knots) * y_knots ** 2 - else: - y_knots = np.linspace(-radius, radius, n_knots) y_spline = sparse.csr_matrix( np.asarray( dmatrix( "bs(x, knots=knots, degree=3, include_intercept=True)", - {"x": list(y), "knots": y_knots}, + { + "x": list(np.hstack([y_knots.min(), y, y_knots.max()])), + "knots": y_knots, + }, ) - ) + )[1:-1] ) + x_spline = x_spline[:, np.asarray(x_spline.sum(axis=0))[0] != 0] + y_spline = y_spline[:, np.asarray(y_spline.sum(axis=0))[0] != 0] X = sparse.hstack( [x_spline.multiply(y_spline[:, idx]) for idx in range(y_spline.shape[1])], format="csr", @@ -402,7 +411,7 @@ def sparse_lessthan(arr, limit): return masked_arr -def _combine_A(A, poscorr=None, time=None): +def _combine_A(A, time, poscorr=None): """ Combines a design matrix A (cartesian) with a time corrector type. If poscorr is provided, A will be combined with both axis of the pos corr as a @@ -424,14 +433,16 @@ def _combine_A(A, poscorr=None, time=None): A2 = sparse.hstack( [ A, + A.multiply(time.ravel()[:, None]), + A.multiply(time.ravel()[:, None] ** 2), A.multiply(poscorr[0].ravel()[:, None]), A.multiply(poscorr[1].ravel()[:, None]), - A.multiply((poscorr[0] * poscorr[1]).ravel()[:, None]), + A.multiply((poscorr[0].ravel() * poscorr[1].ravel())[:, None]), ], format="csr", ) return A2 - elif time is not None: + elif poscorr is None: # Cartesian spline with time dependence A2 = sparse.hstack( [ @@ -542,3 +553,65 @@ def threshold_bin(x, y, z, z_err=None, abs_thresh=10, bins=15, statistic=np.nanm np.hstack(new_z), np.hstack(new_z_err), ) + + +def _find_uncontaminated_pixels(mask): + """ + creates a mask of shape nsources x npixels where targets are not contaminated. + This mask is used to select pixels to build the PSF model. + """ + + new_mask = mask.multiply(np.asarray(mask.sum(axis=0) == 1)[0]).tocsr() + new_mask.eliminate_zeros() + return new_mask + + +def _load_ffi_image( + telescope, + fname, + extension, + cutout_size=None, + cutout_origin=[0, 0], + return_coords=False, +): + """Use fitsio to load an image + + Parameters + ---------- + telescope: str + String for the telescope + fname: str + Path to the filename + extension: int + Extension to cut out of the image + """ + f = fitsio.FITS(fname)[extension] + if telescope.lower() == "kepler": + # CCD overscan for Kepler + r_min = 20 + r_max = 1044 + c_min = 12 + c_max = 1112 + elif telescope.lower() == "tess": + # CCD overscan for TESS + r_min = 0 + r_max = 2048 + c_min = 45 + c_max = 2093 + else: + raise TypeError("File is not from Kepler or TESS mission") + # If the image dimension is not the FFI shape, we change the r_max and c_max + dims = f.get_dims() + if dims != [r_max, c_max]: + r_max, c_max = np.asarray(dims) + r_min += cutout_origin[0] + c_min += cutout_origin[1] + if (r_min > r_max) | (c_min > c_max): + raise ValueError("`cutout_origin` must be within the image.") + if cutout_size is not None: + r_max = np.min([r_min + cutout_size, r_max]) + c_max = np.min([c_min + cutout_size, c_max]) + if return_coords: + row_2d, col_2d = np.mgrid[r_min:r_max, c_min:c_max] + return col_2d, row_2d, f[r_min:r_max, c_min:c_max] + return f[r_min:r_max, c_min:c_max] diff --git a/tests/test_ffimachine.py b/tests/test_ffimachine.py index e870a04..d9a216b 100644 --- a/tests/test_ffimachine.py +++ b/tests/test_ffimachine.py @@ -21,13 +21,14 @@ def test_ffi_from_file(): assert isinstance(ffi, Machine) # test attributes have the right shapes assert ffi.time.shape == (1,) - assert ffi.flux.shape == (1, 33812) - assert ffi.flux_2d.shape == (1, 180, 188) - assert ffi.flux_err.shape == (1, 33812) - assert ffi.column.shape == (33812,) - assert ffi.row.shape == (33812,) - assert ffi.ra.shape == (33812,) - assert ffi.dec.shape == (33812,) + img_shape = ffi.image_shape + assert ffi.flux.shape == (1, img_shape[0] * img_shape[1]) + assert ffi.flux_2d.shape == (1, *img_shape) + assert ffi.flux_err.shape == (1, img_shape[0] * img_shape[1]) + assert ffi.column.shape == (img_shape[0] * img_shape[1],) + assert ffi.row.shape == (img_shape[0] * img_shape[1],) + assert ffi.ra.shape == (img_shape[0] * img_shape[1],) + assert ffi.dec.shape == (img_shape[0] * img_shape[1],) assert ffi.sources.shape == (259, 13) diff --git a/tests/test_machine.py b/tests/test_machine.py index 5e37968..7c084f1 100644 --- a/tests/test_machine.py +++ b/tests/test_machine.py @@ -19,7 +19,7 @@ def test_create_delta_sparse_arrays(): machine = TPFMachine.from_TPFs(tpfs) # create numpy arrays - machine._create_delta_arrays() + # machine._create_delta_arrays() non_sparse_arr = machine.__dict__.copy() # check for main attrs shape @@ -44,12 +44,13 @@ def test_create_delta_sparse_arrays(): # manually mask numpy arrays to compare them vs sparse array dist_lim = 40 - mask = (np.abs(non_sparse_arr["dra"].value) <= dist_lim / 3600) & ( - np.abs(non_sparse_arr["ddec"].value) <= dist_lim / 3600 + mask = (np.abs(non_sparse_arr["dra"]) <= dist_lim / 3600) & ( + np.abs(non_sparse_arr["ddec"]) <= dist_lim / 3600 ) # create sparse arrays - machine._create_delta_sparse_arrays(dist_lim=dist_lim) + del machine.dra + machine._update_delta_sparse_arrays(dist_lim=dist_lim) sparse_arr = machine.__dict__.copy() assert sparse_arr["dra"].shape == non_sparse_arr["dra"].shape @@ -68,10 +69,10 @@ def test_create_delta_sparse_arrays(): assert sparse_arr["phi"].data.shape == (853,) # compare sparse array vs numpy array values - assert (non_sparse_arr["dra"][mask].value == sparse_arr["dra"].data).all() - assert (non_sparse_arr["ddec"][mask].value == sparse_arr["ddec"].data).all() - assert (non_sparse_arr["r"][mask].value == sparse_arr["r"].data).all() - assert (non_sparse_arr["phi"][mask].value == sparse_arr["phi"].data).all() + assert (non_sparse_arr["dra"][mask] == sparse_arr["dra"].data).all() + assert (non_sparse_arr["ddec"][mask] == sparse_arr["ddec"].data).all() + assert np.allclose(non_sparse_arr["r"][mask], sparse_arr["r"].data) + assert np.allclose(non_sparse_arr["phi"][mask], sparse_arr["phi"].data) @pytest.mark.remote_data