Source code for sourcefinder.stats

"""Generic utility routines for number handling and calculating
(specific) variances used by the TKP sourcefinder.

"""

import math
import numpy as np
from numba import njit, guvectorize, int32, float32
from sourcefinder.utils import newton_raphson_root_finder


@njit  # pragma: no cover
[docs] def erf(val): return math.erf(val)
@njit # pragma: no cover
[docs] def find_true_std(sigma, clipped_std, clip_limit): """ This function defines the transcendental equation 2.25 from Spreeuw's thesis, i.e. the root of this function is the true standard deviation, derived from a clipped standard deviation and a clipping limit. Parameters ---------- sigma : float The true standard deviation. clipped_std : float The standard deviation of the clipped data. clip_limit : float The clipping limit. Returns ------- float The value of the function for the given sigma. Notes ----- This function, together with its derivative should be input to a root finder algorithm; i.e. we are looking for the value of sigma such that this function returns zero. """ help1 = clip_limit / (sigma * np.sqrt(2)) help2 = np.sqrt(2 * np.pi) * erf(help1) return ( sigma**2 * (help2 - 2 * np.sqrt(2) * help1 * np.exp(-(help1**2))) - clipped_std**2 * help2 )
@njit # pragma: no cover
[docs] def indep_pixels(n, correlation_lengths): """Calculate the number of independent pixels given the total number of pixels and the correlation lengths. Parameters ---------- n : int The total number of pixels. correlation_lengths : tuple of float A tuple containing the long and short correlation lengths. Returns ------- float The number of independent pixels. """ corlengthlong, corlengthshort = correlation_lengths correlated_area = 0.25 * np.pi * corlengthlong * corlengthshort return n / correlated_area
@guvectorize( [(float32[:], int32[:], float32[:], float32[:])], "(k), () -> (), ()", target="parallel", )
[docs] def data_clipper_dynamic( flat_data, number_of_non_nan_elements, mean, std ): # pragma: no cover """Perform dynamic data clipping. Perform dynamic data clipping to calculate the mean and standard deviation of the background pixels in a subimage. This function was written to calculate two grids that together determine the statistics of the background noise of the image. The nodes of those grids are centered on the subimages. The guvectorize decorator allows for 3D input instead of the flattened subimage data. The first and second dimensions correspond to the desired grid dimensions, and the third dimension corresponds to the flattened subimage size. See utils.make_subimages for information on how suitable input for this function is generated. The parameters in this docstring apply to a single subimage. "dynamic" kappa * sigma clipping differs from classical sigma clipping in the sense that a bias correction is applied; sigma as derived from a clipped distribution is biased low. Parameters ---------- flat_data : numpy.ndarray The flattened array of subimage data. number_of_non_nan_elements : numpy.ndarray A single-element array containing the number of non-NaN elements in `flat_data`. mean : numpy.ndarray A single-element array to store the calculated mean of the background pixels of the subimage. std : numpy.ndarray A single-element array to store the calculated standard deviation of the background pixels of the subimage. Notes ----- This function uses an iterative approach to clip the data and calculate the mean and standard deviation of the background pixels. The median is used as a robust approximation of the mean, and the standard deviation is corrected for bias when clipping is applied. The SExtractor approach for estimating the mean in case of skewed vs. not-too-skewed distributions is followed. """ # In this context we use the terms nan and masked interchangeably, since we # expect every image value that is a nan to be masked. # The first number_of_non_nan_elements of flat_data will be not nans. # We need at least two non-nan elements to compute a standard deviation. # However, here we apply a stricter policy and require all subimage data to # be unmasked. Without this requirement, we cannot guarantee that the # subimage data used to calculate a grid node value is centered on that # node. if number_of_non_nan_elements[0] == flat_data.size: limit = 0 while True: # When there are sources, the median is a more robust # approximation of the mean of the background pixels than a plain # mean. Think of this approach as a mode approximation, since # background pixels should outnumber source pixels. mean[0] = np.median(flat_data) regular_std = np.std(flat_data) if limit: # The standard deviation of clipped data will be biased low, # correct for that. std[0], iterations = newton_raphson_root_finder( find_true_std, regular_std, 0, limit, 1e-8, 100, regular_std, limit, ) else: std[0] = regular_std limit = 2 * std[0] clipped_data = flat_data[flat_data < mean[0] + limit] clipped_data = clipped_data[clipped_data > mean[0] - limit] if clipped_data.size == flat_data.size: break flat_data = clipped_data plain_mean = np.mean(flat_data) # If the distribution of remaining pixel values is not too skewed, this # gives a better approximation of the mean of the background pixels. if np.fabs(plain_mean - mean[0]) / std[0] < 0.3: mean[0] = 2.5 * mean[0] - 1.5 * plain_mean else: mean[0] = 0 std[0] = 0