Source code for sourcefinder.stats

"""Generic utility routines for number handling and calculating
(specific) variances used by the TKP sourcefinder.

"""

import math
import numpy as np
from numba import njit, guvectorize, int32, float32
from sourcefinder.utils import newton_raphson_root_finder


@njit  # pragma: no cover

[docs]
def erf(val):
    return math.erf(val)



@njit  # pragma: no cover

[docs]
def find_true_std(sigma, clipped_std, clip_limit):
    """
    This function defines the transcendental equation 2.25 from Spreeuw's
    thesis, i.e. the root of this function is the true standard deviation,
    derived from a clipped standard deviation and a clipping limit.

    Parameters
    ----------
    sigma : float
        The true standard deviation.
    clipped_std : float
        The standard deviation of the clipped data.
    clip_limit : float
        The clipping limit.

    Returns
    -------
    float
        The value of the function for the given sigma.

    Notes
    -----
    This function, together with its derivative should be input to a root
    finder algorithm; i.e. we are looking for the value of sigma such that
    this function returns zero.

    """
    help1 = clip_limit / (sigma * np.sqrt(2))
    help2 = np.sqrt(2 * np.pi) * erf(help1)
    return (
        sigma**2 * (help2 - 2 * np.sqrt(2) * help1 * np.exp(-(help1**2)))
        - clipped_std**2 * help2
    )



@njit  # pragma: no cover

[docs]
def indep_pixels(n, correlation_lengths):
    """Calculate the number of independent pixels given the total
    number of pixels and the correlation lengths.

    Parameters
    ----------
    n : int
        The total number of pixels.
    correlation_lengths : tuple of float
        A tuple containing the long and short correlation lengths.

    Returns
    -------
    float
        The number of independent pixels.

    """
    corlengthlong, corlengthshort = correlation_lengths
    correlated_area = 0.25 * np.pi * corlengthlong * corlengthshort
    return n / correlated_area



@guvectorize(
    [(float32[:], int32[:], float32[:], float32[:])],
    "(k), () -> (), ()",
    target="parallel",
)

[docs]
def data_clipper_dynamic(
    flat_data, number_of_non_nan_elements, mean, std
):  # pragma: no cover
    """Perform dynamic data clipping.

    Perform dynamic data clipping to calculate the mean and standard
    deviation of the background pixels in a subimage. This function
    was written to calculate two grids that together determine the
    statistics of the background noise of the image. The nodes of
    those grids are centered on the subimages.  The guvectorize
    decorator allows for 3D input instead of the flattened subimage
    data. The first and second dimensions correspond to the desired
    grid dimensions, and the third dimension corresponds to the
    flattened subimage size. See utils.make_subimages for information
    on how suitable input for this function is generated.  The
    parameters in this docstring apply to a single subimage.
    "dynamic" kappa * sigma clipping differs from classical sigma
    clipping in the sense that a bias correction is applied; sigma as
    derived from a clipped distribution is biased low.

    Parameters
    ----------
    flat_data : numpy.ndarray
        The flattened array of subimage data.
    number_of_non_nan_elements : numpy.ndarray
        A single-element array containing the number of non-NaN elements in
        `flat_data`.
    mean : numpy.ndarray
        A single-element array to store the calculated mean of the background
        pixels of the subimage.
    std : numpy.ndarray
        A single-element array to store the calculated standard deviation of
        the background pixels of the subimage.

    Notes
    -----
    This function uses an iterative approach to clip the data and calculate the
    mean and standard deviation of the background pixels. The median is used as
    a robust approximation of the mean, and the standard deviation is corrected
    for bias when clipping is applied. The SExtractor approach for estimating
    the mean in case of skewed vs. not-too-skewed distributions is followed.

    """
    # In this context we use the terms nan and masked interchangeably, since we
    # expect every image value that is a nan to be masked.
    # The first number_of_non_nan_elements of flat_data will be not nans.
    # We need at least two non-nan elements to compute a standard deviation.
    # However, here we apply a stricter policy and require all subimage data to
    # be unmasked. Without this requirement, we cannot guarantee that the
    # subimage data used to calculate a grid node value is centered on that
    # node.
    if number_of_non_nan_elements[0] == flat_data.size:
        limit = 0
        while True:
            # When there are sources, the median is a more robust
            # approximation of the mean of the background pixels than a plain
            # mean. Think of this approach as a mode approximation, since
            # background pixels should outnumber source pixels.
            mean[0] = np.median(flat_data)
            regular_std = np.std(flat_data)

            if limit:
                # The standard deviation of clipped data will be biased low,
                # correct for that.
                std[0], iterations = newton_raphson_root_finder(
                    find_true_std,
                    regular_std,
                    0,
                    limit,
                    1e-8,
                    100,
                    regular_std,
                    limit,
                )
            else:
                std[0] = regular_std

            limit = 2 * std[0]
            clipped_data = flat_data[flat_data < mean[0] + limit]
            clipped_data = clipped_data[clipped_data > mean[0] - limit]
            if clipped_data.size == flat_data.size:
                break
            flat_data = clipped_data

        plain_mean = np.mean(flat_data)
        # If the distribution of remaining pixel values is not too skewed, this
        # gives a better approximation of the mean of the background pixels.
        if np.fabs(plain_mean - mean[0]) / std[0] < 0.3:
            mean[0] = 2.5 * mean[0] - 1.5 * plain_mean
    else:
        mean[0] = 0
        std[0] = 0