Source code for mojito.sampling

"""
Time and frequency grids
========================

This module provides helper classes to handle time and frequency sampling
attributes in Mojito L1 files.

.. autoclass:: UniformTimeSampling
    :members:

.. autoclass:: LogUniformFrequencySampling
    :members:
"""

import logging
from math import ceil, floor
from typing import Self

import numpy as np
from attrs import field, frozen
from attrs.validators import gt
from h5py import Group
from numpy.typing import NDArray

logger = logging.getLogger(__name__)



[docs]
@frozen(kw_only=True)
class UniformTimeSampling:
    """Helper class to handle sampling attributes in Mojito L1 files."""


[docs]
    @classmethod
    def from_h5_group(cls, group: Group) -> Self:
        """Create an instance from an HDF5 group.

        Parameters
        ----------
        group
            HDF5 group containing the sampling attributes.

        Returns
        -------
        TimeSampling
            Instance populated with the group's attributes.
        """
        t0_attr = group.attrs["t0"]
        dt_attr = group.attrs["dt"]
        size_attr = group.attrs["size"]

        assert isinstance(t0_attr, np.number)
        assert isinstance(dt_attr, np.number)
        assert isinstance(size_attr, np.integer)

        return cls(
            t0=float(t0_attr),
            dt=float(dt_attr),
            size=int(size_attr),
        )


    t0: float
    """Start time of the sampling [s]."""

    dt: float = field(validator=gt(0))
    """Time step of the sampling [s]."""

    size: int = field(validator=gt(0))
    """Number of samples in the time series."""

    @property
    def fs(self) -> float:
        """Sampling frequency [Hz]."""
        return 1.0 / self.dt

    @property
    def duration(self) -> float:
        """Duration of the time series [s].

        This is defined as ``size * dt``. Note that the last timestamp is
        not ``t0 + duration``, but ``t0 + (size - 1) * dt``.
        """
        return self.size * self.dt


[docs]
    def tmax(self, elapsed: bool = False) -> float:
        """Return the last timestamp.

        The last timestamp is defined as the last element of the array returned
        by :meth:`t`. Note that this is not ``t0 + duration``.

        If ``elapsed`` is False, it corresponds to ``t0 + (size - 1) * dt``. If
        ``elapsed`` is True, the time origin is ignored and the last timestamp
        is ``(size - 1) * dt``.

        Parameters
        ----------
        elapsed
            If True, ignore ``t0``.

        Returns
        -------
            Last timestamp [s].
        """
        tmax = (self.size - 1) * self.dt
        if not elapsed:
            tmax += self.t0
        return tmax



[docs]
    def t(
        self,
        idx: slice | None = None,
        *,
        elapsed: bool = False,
    ) -> NDArray:
        """Return the uniform array of times.

        The time grid is defined as ``t0 + i * dt`` for ``i`` in
        ``range(size)``.

        Use the ``slice`` parameter to return a sliced version of the time grid.
        This is useful for long datasets, as this will only allocate the
        selected timestamps in memory. For example:

        >>> sampling = UniformTimeSampling(t0=1.5, dt=0.5, size=10)
        >>> sampling.t(slice(2, 7, 2))
        array([2.5, 3.5, 4.5])

        If ``elapsed`` is True, the time grid is defined as ``i * dt`` for ``i``
        in ``range(size)``. This can be useful to preserve numerical precision.

        Parameters
        ----------
        idx
            Slice selecting which timestamps to return.
        elapsed
            If True, return elapsed time grid starting at 0.

        Returns
        -------
        NDArray
            Array of times [s]. Shape is ``(size,)`` if ``idx`` is None,
            otherwise it matches the selected slice length.
        """
        t0 = 0 if elapsed else self.t0
        if idx is None:
            indices = np.arange(self.size, dtype=float)
        else:
            start_idx, stop_idx, step_idx = idx.indices(self.size)
            indices = np.arange(start_idx, stop_idx, step_idx, dtype=float)
        return t0 + indices * self.dt



[docs]
    def slice_between(self, tmin: float | None, tmax: float | None) -> slice:
        r"""Return a slice that contains the interval [tmin, tmax].

        We guarantee that any time ``t`` such that ``tmin <= t <= tmax`` is
        included in the returned slice.

        .. code-block:: python

            sampling = UniformTimeSampling(t0=0.0, dt=0.1, size=100)
            time_array = sampling.t()

            # Pick a random time between 2.3s and 5.7s
            tmin = 2.3
            tmax = 5.7
            random_t = np.random.uniform(tmin, tmax)

            # Slice the time array
            time_slice = sampling.slice_between(tmin, tmax)
            selected_times = time_array[time_slice]

            # We guarantee that random_t is in selected_times
            assert random_t in selected_times


        .. warning::

           Because floating-point arithmetics is not exact, we extend the slice
           so that it will always contain ``tmin`` and ``tmax``. As a result,
           the returned slice can be larger than ``(tmax - tmin) / dt``. Note
           that it is at most ``2 * self.dt`` larger.

        Parameters
        ----------
        tmin
            Lower bound of the time range [s]. If None, the slice starts at the
            beginning of the time series.
        tmax
            Upper bound of the time range [s]. If None, the slice ends at the
            end of the time series.

        Returns
        -------
            A slice instance such that all times ``t`` with ``tmin <= t <=
            tmax`` are included in ``self.t()[slice]``.

        Raises
        ------
        ValueError
            If ``tmin < self.t0``, or if ``tmax > self.tmax()``.
        """

        if tmin is not None and tmin < self.t0:
            raise ValueError(
                "tmin must be larger or equal to self.t0"
                f"(got {tmin} while self.t0 is {self.t0})"
            )
        if tmax is not None and tmax > self.tmax():
            raise ValueError(
                "tmax must be larger or equal to self.tmax()"
                f"(got {tmin} while self.tmax() is {self.tmax()})"
            )

        if tmin is None:
            idx_min = None
        else:
            idx_min = floor((tmin - self.t0 - 1e-15 * self.dt) / self.dt)
            idx_min = max(0, idx_min)

        if tmax is None:
            idx_max = None
        else:
            idx_max = ceil((tmax - self.t0 + 1e-15 * self.dt) / self.dt) + 1
            idx_max = min(self.size, idx_max)

        return slice(idx_min, idx_max)





[docs]
@frozen(kw_only=True)
class LogUniformFrequencySampling:
    """Helper class to handle logarithmic frequency sampling in Mojito L1 files.

    Frequencies must be positive here.
    """


[docs]
    @classmethod
    def from_h5_group(cls, group: Group) -> Self:
        """Create an instance from an HDF5 group.

        Parameters
        ----------
        group
            HDF5 group containing the frequency sampling attributes.

        Returns
        -------
        LogUniformFrequencySampling
            Instance populated with the group's attributes.
        """
        fmin_attr = group.attrs["fmin"]
        fmax_attr = group.attrs["fmax"]
        size_attr = group.attrs["size"]

        assert isinstance(fmin_attr, np.number)
        assert isinstance(fmax_attr, np.number)
        assert isinstance(size_attr, np.integer)

        return cls(
            fmin=float(fmin_attr),
            fmax=float(fmax_attr),
            size=int(size_attr),
        )


    fmin: float = field(validator=gt(0))
    """Minimum frequency [Hz]."""

    fmax: float = field(validator=gt(0))
    """Maximum frequency [Hz]."""

    size: int = field(validator=gt(0))
    """Number of frequencies."""

    def __attrs_post_init__(self) -> None:
        """Post-initialization checks."""
        if self.fmax <= self.fmin:
            raise ValueError("fmax must be greater than fmin.")


[docs]
    def f(self, idx: slice | None = None) -> NDArray:
        r"""Return the log-uniform array of frequencies.

        The frequency grid is defined as :math:`f_i = 10^{F_i}` with :math:`F_i
        = \log_{10}(f_\mathrm{min}) + i \Delta \log_{10}(f)`, for :math:`i`
        ranging from ``0`` to ``size - 1`` and :math:`\Delta \log_{10}(f) =
        (\log_{10}(f_\mathrm{max}) - \log_{10}(f_\mathrm{min})) / (\text{size} -
        1)`.

        Use the ``idx`` parameter to return a sliced version of the frequency
        grid. This is useful for long datasets, as this will only allocate the
        selected frequencies in memory. For example:

        >>> sampling = LogUniformFrequencySampling(fmin=1.0, fmax=100.0, size=10)
        >>> sampling.f(slice(2, 7, 2))
        array([ 2.7825594 ,  7.74263683, 21.5443469 ])

        Parameters
        ----------
        idx
            Slice selecting which frequencies to return.

        Returns
        -------
        NDArray
            Array of frequencies [Hz]. Shape is ``(size,)`` if ``idx`` is
            None, otherwise it matches the selected slice length.
        """
        if idx is None:
            indices = np.arange(self.size, dtype=float)
        else:
            start_idx, stop_idx, step_idx = idx.indices(self.size)
            indices = np.arange(start_idx, stop_idx, step_idx, dtype=float)

        log_fmin = np.log10(self.fmin)
        if self.size == 1:
            log_f = np.full_like(indices, log_fmin)
        else:
            delta_log_f = (np.log10(self.fmax) - log_fmin) / (self.size - 1)
            log_f = log_fmin + indices * delta_log_f

        return np.power(10.0, log_f)