Source code for sourcefinder.config

from collections import defaultdict
from dataclasses import astuple
from dataclasses import dataclass
from dataclasses import field
from dataclasses import is_dataclass
from pathlib import Path
import sys

if sys.version_info >= (3, 11):
    import tomllib
else:
    import tomli as tomllib

from types import UnionType
from typing import get_args
from typing import get_origin
from typing import get_type_hints
from typing import Container
from typing import Type
from typing import TypeVar
from warnings import warn

from sourcefinder.utility.sourceparams import SourceParams, file_fields


[docs]
T = TypeVar("T")




[docs]
def _is_dataclass(_type: Type[T], /) -> bool:
    """Remove ``TypeGuard`` from is_dataclass.

    see: https://github.com/python/mypy/issues/14941

    """
    return is_dataclass(_type)



# map of types that maybe converted to match the expected type

[docs]
_compat_types: defaultdict[type, set[type]] = defaultdict(set, {int: {float}})




[docs]
def assert_t(key: str, value, *types: type):
    """Assert value is of one of the types

    ``key`` is the TOML configuration key the value is associated to.
    It is used to generate a meaningful error message.

    """
    assert len(types) > 0, "need at least one type to assert"
    msg = f"{key}: type({value!r}) "
    if len(types) > 1:
        msg += f"∉ {{{', '.join(map(str, types))}}}"
    else:
        msg += f"!= {types[0]}"

    try:
        assert isinstance(value, types), msg
    except AssertionError:
        # NOTE: check if types are compatible
        if not _compat_types[type(value)].intersection(types):
            raise




[docs]
def validate_nested(key: str, value, origin_t, args):
    """Validate nested types allowed in TOML

    ``key`` is the TOML configuration key being validated.  ``value``
    should be of type ``origin_t[args]``.  It is passed to this
    function separately to avoid recomputing the type again.

    When the type is a ``list``, the value is tested recursively.  On
    recursive calls, the list index is appended to the key.  For
    ``dict``-s, iterate over all key-value pairs and validated.

    """
    # NOTE: only support TOML types
    if issubclass(origin_t, list):
        assert_t(key, value, list)
        # NOTE: unspecified type => Any; can't check
        if not args:
            return
        for i, v in enumerate(value):
            validate_types(f"{key}[{i}]", v, args[0])
    elif issubclass(origin_t, dict):
        assert_t(key, value, dict)
        for k, v in value.items():
            validate_types(f"{key}[{k!r}]", v, args[1])
    else:
        warn(f"{key}: unsupported type {origin_t[args]}, cannot validate")




[docs]
def validate_types(key: str, value, type_: type):
    """Validate types, dispatch on generic or POD types

    ``key`` is the TOML configuration key the value is associated to.
    It is used to generate a meaningful error message.

    """
    match get_origin(type_):
        case type() as origin_t if issubclass(origin_t, Container):
            validate_nested(key, value, origin_t, get_args(type_))
        case type() as origin_t if issubclass(origin_t, UnionType):
            assert_t(key, value, *get_args(type_))
        case type():
            warn(f"{key}: unsupported type {type_}, cannot validate")
        case None:
            # NOTE: plain old data types
            assert_t(key, value, type_)



@dataclass(frozen=True)

[docs]
class _Validate:
    def __post_init__(self):
        for (key, type_), val in zip(
            get_type_hints(self).items(), astuple(self)
        ):
            validate_types(key, val, type_)




[docs]
_structuring_element = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]




[docs]
_source_params = [p.value for p in SourceParams.__members__.values()]



[docs]
_source_params_file = [SourceParams[field].value for field in file_fields]



@dataclass(frozen=True)

[docs]
class ImgConf(_Validate):
    """Configuration that should cover all the specifications for processing the image."""


[docs]
    interpolate_order: int = 1

    """Order of interpolation to use for the background mean and
    background standard deviation (rms) maps (e.g. 1 for linear)

    """


[docs]
    median_filter: int = 0

    """Size of the median filter to apply to background and RMS grids
    prior to interpolating. This is used to discard outliers. Use 0 to
    disable.

    """


[docs]
    mf_threshold: int = 0

    """Threshold (Jy/beam) used with the median filter if
    median_filter is non-zero. This is used to only discard outliers
    (i.e. extreme background mean or rms node values) beyond a certain
    threshold. Use 0 to disable.

    """


[docs]
    rms_filter: float = 0.001

    """Any interpolated background standard deviation (rms) value
    should be above this threshold times the median of all background
    standard deviation (rms) node values. This is used to avoid
    picking up sources towards the edges of the image where the values
    of the background rms map may be the result of poor interpolation,
    i.e. are the result of extrapolation rather than
    interpolation. Use 0 to disable.

    """


[docs]
    deblend_mincont: float = 0.005

    """Minimum flux density fraction (relative to the original,
    i.e. unblended, island) required for a subisland to be considered
    a valid deblended component.

    """


[docs]
    structuring_element: list[list[int]] = field(
        default_factory=lambda: _structuring_element
    )

    """The "structuring element" defines island connectivity as in
    "4-connectivity" and "8-connectivity". These two are the only
    reasonable choices, since the structuring element must be
    centrosymmetric.  The structuring element is applied in
    scipy.ndimage.label, so check its documentation for some
    background on its use.

    """


[docs]
    vectorized: bool = True

    """Measure sources in a vectorized way. Expect peak spectral
    brightnesses with a lower bias (downwards) than for Gaussian fits
    (also downwards), but with a higher bias (upwards for both) for
    the elliptical axes.

    """


[docs]
    nr_threads: int | None = None

    """The number of threads used to parallelize Gaussian fits to detected 
    sources.
    Note: this does not change numba's 'num threads' for parallel numba operations.
    """


[docs]
    margin: int = 0

    """Margin in pixels to ignore near the edge of the image, i.e.
    sources within this margin will not be detected."""


[docs]
    radius: float = 0.0

    """Radius in pixels (from image center) considered valid, i.e. sources
    beyond this radius will not be detected.

    """


[docs]
    back_size_x: int | None = None

    """Subimage size for estimation of background node values (X
    direction). The nodes are centred on the subimages.

    """


[docs]
    back_size_y: int | None = None

    """Subimage size for estimation of background node values (Y
    direction). The nodes are centred on the subimages.

    """


[docs]
    grid: int | None = 64

    """Background subimage size used as fallback for back_size_x and
    back_size_y. If both are not set, this implies
    back_size_x=backsize_y=grid, i.e. the subimages are squares.

    """


[docs]
    eps_ra: float = 0.0

    """Calibration uncertainty in right ascension (degrees), see
    equation 27a of the NVSS paper.

    """


[docs]
    eps_dec: float = 0.0

    """Calibration uncertainty in declination (degrees), see equation
    27b of the NVSS paper.

    """


[docs]
    clean_bias: float = 0.0

    """Clean bias to subtract from the peak brightnesses (Jy/beam), see
    parapagraph 5.2.5 and equation 34 of the NVSS paper.

    """


[docs]
    clean_bias_error: float = 0.0

    """1-sigma uncertainty in clean bias (Jy/beam), see parapagraph 5.2.5 and
    equation 37 of the NVSS paper.

    """


[docs]
    frac_flux_cal_error: float = 0.0

    """Intensity-proportional calibration uncertainty, see paragraph 5.2.5 and 
    equation 37 of the NVSS paper.

    """


[docs]
    alpha_maj1: float = 2.5

    """First exponent for scaling errors along the fitted major 
    axis, see equation 26 and paragraph 5.2.3 of the NVSS paper and 
    equation 41 and paragraph 3 of Condon's (1997) "Errors in Elliptical 
    Gaussian Fits".

    """


[docs]
    alpha_maj2: float = 0.5

    """Second exponent for scaling errors along the fitted major 
    axis, see equation 26 and paragraph 5.2.3 of the NVSS paper and
    equation 41 and paragraph 3 of Condon's (1997) "Errors in Elliptical
    Gaussian Fits".

    """


[docs]
    alpha_min1: float = 0.5

    """First exponent for scaling errors along the fitted minor 
    axis and for scaling errors in the position angle, see equation 26 and 
    paragraph 5.2.3 of the NVSS paper and equation 41 and paragraph 3 of 
    Condon's (1997) "Errors in Elliptical Gaussian Fits".

    """


[docs]
    alpha_min2: float = 2.5

    """Second exponent for scaling errors along the fitted minor 
    axis and for scaling errors in the position angle, see equation 26 and 
    paragraph 5.2.3 of the NVSS paper and equation 41 and paragraph 3 of 
    Condon's (1997) "Errors in Elliptical Gaussian Fits".

    """


[docs]
    alpha_brightness1: float = 1.5

    """First exponent for scaling errors in peak brightness, see
    equation 26 and paragraph 5.2.5 of the NVSS paper and equation 41
    and paragraph 3 of Condon's (1997) "Errors in Elliptical Gaussian
    Fits".

    """


[docs]
    alpha_brightness2: float = 1.5

    """Second exponent for scaling errors in peak brightness, see
    equation 26 and paragraph 5.2.5 of the NVSS paper and equation 41
    and paragraph 3 of Condon's (1997) "Errors in Elliptical Gaussian
    Fits".

    """


[docs]
    detection_thr: float = 10.0

    """Detection threshold as multiple of the background standard
    deviation (rms) map, after the background mean values have been
    subtracted from the image.

    """


[docs]
    analysis_thr: float = 3.0

    """Analysis threshold as multiple of the background standard
    deviation (rms) map, after the background mean values have been
    subtracted from the image.

    """


[docs]
    fdr: bool = False

    """Use False Detection Rate (FDR) algorithm for determining
    detection threshold.

    """


[docs]
    alpha: float = 1e-2

    """FDR alpha value (float, default 0.01) that sets an upper limit
    on the fraction of pixels erroneously detected as source pixels,
    relative to all source pixels.  This requirement should be met
    when averaged over a large ensemble of images, but problems were
    encountered with alpha as low as 0.001, see paragraph 3.6 of
    Spreeuw's thesis.

    """


[docs]
    deblend_nthresh: int = 0

    """Number of deblending subthresholds; 0 to disable."""


[docs]
    bmaj: float | None = None

    """Set beam: Major axis of restoring beam (degrees)."""


[docs]
    bmin: float | None = None

    """Set beam: Minor axis of restoring beam (degrees)."""


[docs]
    bpa: float | None = None

    """Set beam: Restoring beam position angle (degrees)."""


[docs]
    force_beam: bool = False

    """Force source shape to align restoring beam shape (bmaj, bmin,
    bpa) for Gauss fits and vetorized source measurement, i.e. when
    vectorized=True (as of 2025-06-13: upcoming, issue #131).

    """


[docs]
    detection_image: str | None = None

    """Path to detection map. PySE will identify sources and the
    positions of pixels which comprise them on the detection image,
    but then use the corresponding pixels on the target images to
    perform measurements. Of course, the detection image and the
    target image(s) must have the same pixel dimensions. Note that
    only a single detection image may be specified, and the same
    pixels are then used on all target images. Note further that this
    detection-image option is incompatible with --fdr

    """


[docs]
    fixed_posns: str | None = None

    """JSON __list__ of RA, Dec pairs of coordinates to measure
    sources at (disables blind extraction and vectorized source
    measurements).

    """


[docs]
    fixed_posns_file: str | None = None

    """Path to JSON file with RA, Dec pairs of coordinates to measure
    sources at (disables blind extraction and vectorized source
    measurements).

    """


[docs]
    ffbox: float = 3.0

    """When fitting to a fixed position, a square “box” of pixels is
    chosen around the requested position, and the optimization
    procedure allows the source position to vary within that box. The
    size of the box may be changed with this option. Note that this
    parameter is given in units of the major axis of the beam in
    pixels.

    """


[docs]
    ew_sys_err: float = 0.0

    """Systematic error in east-west direction, see paragraph 5.2.3
    of the NVSS paper. Note that this parameter is currently not
    applied in PySE, because it should be considered a final step
    before entering source parameters in a catalog, i.e. it is simply
    returned to allow for systematic positional offset cf. the
    NVSS. Therefore, its unit (degrees, arcseconds) is up to the user.

    """


[docs]
    ns_sys_err: float = 0.0

    """Systematic error in north-south direction, see paragraph 5.2.3
    of the NVSS paper. Note that this parameter is currently not
    applied in PySE, because it should be considered a final step
    before entering source parameters in a catalog, i.e. it is simply
    returned to allow for systematic positional offset cf. the
    NVSS. Therefore, its unit (degrees, arcseconds) is up to the user.

    """


[docs]
    remove_edge_sources: bool = True

    """When source pixels - with values above the analysis threshold - 
    connect with the edge of a map or with masked pixels, do not measure the 
    source properties. Consequently, the parameters of this source will not be 
    returned. The idea here is that, when source pixels are adjacent to 
    edges or masked pixels, we'll likely be missing some pixels and any 
    source measurement will be compromised.

    """



@dataclass(frozen=True)

[docs]
class ExportSettings(_Validate):
    """Selection of output, related to detected sources and/or intermediate
    image processing products"""


[docs]
    output_dir: str = "."

    """Directory in which to write the output files."""


[docs]
    file_type: str = "csv"

    """Output file type (default: csv)."""


[docs]
    skymodel: bool = False

    """Generate sky model."""


[docs]
    csv: bool = False

    """Generate CSV text file (e.g., for TopCat)."""


[docs]
    regions: bool = False

    """Generate DS9 region file(s)."""


[docs]
    rmsmap: bool = False

    """Generate RMS map."""


[docs]
    sigmap: bool = False

    """Generate significance map."""


[docs]
    residuals: bool = False

    """Generate residual maps."""


[docs]
    islands: bool = False

    """Generate island maps."""


[docs]
    pandas_df: bool = True

    """ If True, the measured and derived source parameters will be returned 
    as a Pandas DataFrame. If false, they will be returned as a
    `utility.containers.ExtractionResults` object.
    
    """


[docs]
    source_params: list[str] = field(default_factory=lambda: _source_params)

    """Collect all possible source parameters."""


[docs]
    source_params_file: list[str] = field(
        default_factory=lambda: _source_params_file
    )

    """ Source parameters to include in a file for storage."""



@dataclass(frozen=True)

[docs]
class Conf:

[docs]
    image: ImgConf


[docs]
    export: ExportSettings


    def __post_init__(self):  # noqa: D105
        for key, field_t in get_type_hints(self).items():
            value = getattr(self, key)
            if _is_dataclass(field_t) and isinstance(value, dict):
                # NOTE: have to do it like this since inherited
                # dataclasses are frozen
                super().__setattr__(key, field_t(**value))




[docs]
def normalize_none_values(val):
    if isinstance(val, dict):
        return {k: normalize_none_values(v) for k, v in val.items()}
    elif isinstance(val, list):
        return [normalize_none_values(v) for v in val]
    elif isinstance(val, str) and val.strip().lower() == "none":
        return None
    else:
        return val




[docs]
def read_conf(path: str | Path):
    if path is None:
        data = {"tool": {"pyse": {"image": {}, "export": {}}}}
    else:
        data_raw = tomllib.loads(Path(path).read_text())
        data = normalize_none_values(data_raw)

    conf = data.get("tool", {}).get("pyse", {})
    if not conf:
        match data:
            case {"tool": {"pyse": dict(), **_rest1}, **_rest2}:
                raise KeyError("tool.pyse: empty section in config file")
            case {"tool": dict(), **_rest}:
                raise KeyError(
                    "tool.pyse: section for PySE missing in config file"
                )
            case _:
                raise KeyError(
                    "tool: top-level section missing in config file"
                )
    return Conf(**conf)