Source code for pythia.least_squares_sampler

import warnings
from typing import Callable

import numpy as np
from scipy.integrate import quad
from typing_extensions import deprecated

from . import basis as pt_basis
from .parameter import Parameter
from .sampler import (
    BetaSampler,
    ParameterSampler,
    ProductSampler,
    Sampler,
    UniformSampler,
    assign_sampler,
    get_maximum,
    rejection_sampling,
)


[docs]def owls_sampling_bound(
    dimension: int, stability_threshold: float = 0.5, failure_probability: float = 0.5
) -> int:
    r"""Minimum sample size for stable optimally weighted least squares approximation.

    For a (weighted) linear least squares approximation problem in an
    :math:`m`-dimensional linear space, let :math:`G` denote the estimated Gramian
    matrix with respect to an :math:`L^2`-orthonormal basis of this space.
    The approximation error is small when :math:`G` is close to the idenity matrix.
    This function returns :math:`n_0 \in \mathbb{N}` such that an optimal sample
    (cf. [1]_) of size :math:`n \ge n_0` satisfies
    .. math::
        \|G - I\|_2 \le \delta < 1
    with a probability of at least :math:`1 - p`.

    Parameters
    ----------
    dimension : int
        Dimension of the linear space (:math:`m \in \mathbb{N}`).
    stability_threshold : float, default=0.5
        Stability threshold that should be achieved (:math:`\delta \in (0, 1)`).
    failure_probability : float, default=0.5
        Failure probability with which the stability threshold is breached
        (:math:`p \in (0, 1)`).

    Returns
    -------
    int
        A sample size :math:`n` ensuring that :math:`\|G - I\|_2` exceeds the
        `stability_threshold` :math:`\delta` with a `failure_probability` of
        at most :math:`p`.

    Notes
    -----
    This function is derived from formula (3.1) in [1]_
    .. math::
        p
        = \mathbb{P}[\|I - G\|_2 > \delta]
        \le 2 m \exp\left(-\frac{c_\delta n}{m}\right) ,
    where :math:`m` is the `dimension`, :math:`\delta` is the `stability_threshold`.

    References
    ----------
    .. [1] A. Cohen, G. Migliorati. "Optimal weighted least-squares methods,"
           in The SMAI journal of computational mathematics, vol. 3, pp. 181-203, 2017
    """
    assert isinstance(dimension, int) and dimension > 0
    assert isinstance(stability_threshold, float) and 0 < stability_threshold < 1
    assert isinstance(failure_probability, float) and 0 < failure_probability < 1
    m, p, δ = dimension, failure_probability, stability_threshold
    c_δ = δ + (1 - δ) * np.log(1 - δ)
    return int(np.ceil(m / c_δ * (np.log(2 * m) - np.log(p))))


[docs]class OWLSUnivariateSampler(Sampler):
    r"""Sampler for univariate optimally distributed samples on given domain.

    Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}`
    with :math:`y\sim\pi` and a finite subset
    :math:`\{P_j\}_{j=0}^{d-1}` of an orthonormal polynomial basis of
    :math:`L^2(\Gamma,\pi)`, the optimal weighted least-squares sampling
    distribution for a function
    :math:`u\in\operatorname{span}\{P_j\ \vert\ j=0,\dots,d-1 \}` reads

    .. math::
        \mathrm{d}\mu = w^{-1} \mathrm{d}\pi
        \qquad\mbox{with weight}\qquad
        w^{-1}(y) = \frac{1}{d}\sum_{j=0}^{d-1}\vert P_j(y)\vert^2.

    Parameters
    ----------
    domain : array_like
        Interval of support of distribution.

    Notes
    -----
    To generate samples from the weighted least-squares distribution rejection
    sampling is used. For certain basis functions it is possible to choose a
    well-suited trial sampler for the rejection sampling, which can be enabled
    via setting ``tsa=True``.

    See Also
    --------
    pythia.sampler.OWLSTensorSampler

    References
    ----------
    The optimal weighted least-squares sampling is based on the results in
    Cohen & Migliorati [1]_.

    .. [1] Cohen, A. and Migliorati, G.,
        “Optimal weighted least-squares methods”,
        SMAI Journal of Computational Mathematics 3, 181-203 (2017).
    """

    def __init__(self, param: Parameter, deg: int, tsa: bool = True) -> None:
        """Initiate OWLSUnivariateSampler object."""
        self.parameter = param
        self.deg = deg
        self._base_sampler = assign_sampler(param)
        self.domain = self._base_sampler.domain
        self._basis = pt_basis.univariate_basis([self.parameter], [self.deg])[0]
        self._tsa = tsa
        self._trial_sampler, self._bulk = self._compute_trial_sampler()
        assert self.domain.shape == (1, 2)
        # self._length = self.domain[0, 1] - self.domain[0, 0]

    @property
    def dimension(self) -> int:
        """Parameter dimension."""
        return self.domain.shape[0]

    @property
    def mass(self) -> float:
        """Mass of the PDF."""
        return 1

    @property
    def maximum(self) -> float:
        """Maximum value of the PDF."""
        if self.parameter.distribution == "uniform":
            # alternatively return pdf in domain[0, 1]
            maximum = self.pdf(self.domain[0, 0])
            assert isinstance(maximum, float)
            return maximum
        elif self.parameter.distribution == "normal":
            if self.deg % 2 == 0:
                assert self.parameter.mean is not None
                maximum = self.pdf(self.parameter.mean)
                assert isinstance(maximum, float)
                return maximum
            else:
                # Note: this can be improved
                return get_maximum(self.pdf, self.domain)
        elif self.parameter.distribution == "gamma":
            # Note: this is probably not going to work for domain = [a, inf]
            return get_maximum(self.pdf, self.domain)
        elif self.parameter.distribution == "beta":
            # Note: this can be improved
            return get_maximum(self.pdf, self.domain)
        return get_maximum(self.pdf, self.domain)

    @property
    def mean(self) -> float:
        """Mean value of the distribution."""
        mean, err = quad(
            lambda x: x * self.pdf(x), self.domain[0, 0], self.domain[0, 1]
        )
        if err > 1e-8:
            warnings.warn(f"quadrature error large: ({err})")
        return mean

    @property
    def cov(self) -> float:
        """(Co)Variance of the distribution."""
        moment, err = quad(
            lambda x: x**2 * self.pdf(x), self.domain[0, 0], self.domain[0, 1]
        )
        if err > 1e-8:
            warnings.warn(f"quadrature error large: ({err})")
        return moment - self.mean**2

    @property
    def var(self) -> float:
        """Variance of the distribution."""
        return self.cov

    @property
    def std(self) -> float:
        """Standard deviation of the distribution."""
        return np.sqrt(self.var)

[docs]    def weight(self, x: np.ndarray | float | int) -> np.ndarray:
        """Weights for the pdf.

        Parameters
        ----------
        x : np.ndarray
            Points the weight function is evaluated in.

        Returns
        -------
        w : array_like
            Weights of evaluation points `x`.
        """
        if isinstance(x, (float, int)):
            x = np.array(x).reshape(1)
        if x.ndim == 2 and x.shape[1] == 1:
            x = np.array(x).reshape(-1)
        assert x.ndim < 2
        b_eval = np.sum([np.abs(p(x)) ** 2 for p in self._basis], axis=0)
        return (self.deg + 1) / b_eval

[docs]    def pdf(self, x: np.ndarray | np.floating | float | int) -> np.ndarray:
        """Evaluate uniform PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of PDF evaluated in `x`.
        """
        x = np.asarray(x)
        return self._base_sampler.pdf(x) / self.weight(x)

[docs]    def log_pdf(self, x: np.ndarray | float | int) -> np.ndarray:
        """Evaluate uniform log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of log-PDF evaluated in `x`.
        """
        if isinstance(x, (float, int)):
            x = np.array(x).reshape(1)
        if x.ndim == 2 and x.shape[1] == 1:
            x = np.array(x).reshape(-1)
        return self._base_sampler.log_pdf(x) - np.log(self.weight(x))

[docs]    def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate gradient of uniform log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of gradient (vector valued) of log-PDF evaluated in `x`.
        """
        raise NotImplementedError

[docs]    def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate Hessian of uniform log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of Hessian (matrix valued) of log-PDF evaluated in `x`.
        """
        raise NotImplementedError

    def _compute_trial_sampler(self) -> tuple[Sampler, float]:
        """Trial sampler adaptation.

        .. note::
            TSA currently only available for uniform parameter distribution.

        Parameters
        ----------
        tsa : bool
            Adapt trial sampler or simply use uniform product sampler.

        Returns
        -------
        trial_sampler : pt.sampler.Sampler
            Trial sampler.
        bulk : array_like
            Domain estimate of the mass of the distribution.
        """
        trial_sampler: Sampler
        if self._tsa is True and self.parameter.distribution == "uniform":
            trial_sampler = BetaSampler(self.parameter.domain, 0.5, 0.5)
        else:
            trial_sampler = UniformSampler(self.parameter.domain)
        bulk = get_maximum(lambda x: self.pdf(x) / trial_sampler.pdf(x), self.domain)
        return trial_sampler, bulk

[docs]    def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray:
        """Draw samples from weighted least-squares parameter distribution.

        Parameters
        ----------
        shape : array_like
            Shape of the samples.

        Returns
        -------
        :
            Random samples of specified shape.
        """
        if isinstance(shape, int):
            shape = (shape,)
        samples = rejection_sampling(
            self.pdf, self._trial_sampler, self._bulk, self.dimension, shape
        )
        return samples


[docs]class OWLSSampler(Sampler):
    r"""Weighted Least-Squares sampler.

    Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}^M`
    with :math:`y\sim\pi`, a set of multiindices
    :math:`\Lambda\subset\mathbb{N}_0^M` and a finite subset
    :math:`\{P_\alpha\}_{\alpha\in\Lambda}` of an orthonormal polynomial
    basis of :math:`L^2(\Gamma,\pi)`, the optimal weighted least-squares
    sampling distribution for a function
    :math:`u\in\operatorname{span}\{P_\alpha\ \vert\ \alpha\in\Lambda\}`
    reads

    .. math::
        \mathrm{d}\mu = w^{-1} \mathrm{d}\pi
        \qquad\mbox{with weight}\qquad
        w^{-1}(y) = \frac{1}{|\Lambda|}\sum_{\alpha\in\Lambda} |P_\alpha(y)|^2,

    where :math:`\vert\Lambda\vert` denotes the number of elements in
    :math:`\Lambda`.

    Parameters
    ----------
    params : list of `pythia.parameter.Parameter`
        list of parameters.
    basis : list
        list of basis functions.
    tsa : bool, default=False
        Trial sampler adaptation. If True, a trial sampler is chosen on the
        distributions of parameters, if false a uniform trial sampler is used.

    Other Parameters
    ----------------
    trial_sampler : pythia.sampler.Sampler, default=None
        Trial sampler for rejection sampling. If `tsa` is true and either
        `trial_sampler` or `bulk` are `None`, the trial sampler is chosen
        automatically.
    bulk : float, defaul=None
        Scaling for trial sampler. If `tsa` is true and either
        `trial_sampler` or `bulk` are `None`, the trial sampler is chosen
        automatically.

    Notes
    -----
    To generate samples from the weighted least-squares distribution rejection
    sampling is used. For certain basis functions it is possible to choose a
    well-suited trial sampler for the rejection sampling, which can be enabled
    via setting ``tsa=True``.

    See Also
    --------
    pythia.sampler.OWLSUnivariateSampler, pythia.sampler.OWLSTensorSampler

    References
    ----------
    The optimal weighted least-squares sampling is based on the results of
    Cohen & Migliorati [1]_.
    """

    def __init__(
        self,
        params: list[Parameter],
        basis: list[Callable],
        tsa: bool = True,
        trial_sampler: Sampler | None = None,
        bulk: float | None = None,
    ) -> None:
        """Initiate OWLSSampler object."""
        self.parameter = params
        self._param_sampler = ParameterSampler(self.parameter)
        self.domain = self._param_sampler.domain
        self.basis = basis
        self._tsa = tsa
        if trial_sampler is None or bulk is None:
            self._trial_sampler, self._bulk = self._compute_trial_sampler()
        else:
            self._trial_sampler = trial_sampler
            self._bulk = bulk

    @property
    def dimension(self) -> int:
        """Dimension of the parameters."""
        return self._param_sampler.dimension

    @property
    def mass(self) -> float:
        """Mass of the PDF."""
        return 1

    @property
    def maximum(self) -> float:
        """Maximum value of the PDF."""
        return get_maximum(self.pdf, self.domain)

    @property
    def mean(self) -> np.ndarray:
        """Mean of the PDF."""
        raise NotImplementedError

    @property
    def cov(self) -> np.ndarray:
        """Covariance of the PDF."""
        raise NotImplementedError

[docs]    def weight(self, x: np.ndarray) -> np.ndarray:
        """Weights for the PDF.

        Parameters
        ----------
        x : array_like
            Points the weight function is evaluated in.

        Returns
        -------
        :
            weights of evaluation points `x`.
        """
        km = np.sum([abs(p(x)) ** 2 for p in self.basis], axis=0)
        return len(self.basis) / km

[docs]    def pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of PDF evaluated in `x`.
        """
        assert x.shape[-1] == self.dimension
        return self._param_sampler.pdf(x) / self.weight(x)

[docs]    def log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate log-PDF.

        The log-PDF is given by the sum of the univariate log-PDFs.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of log-PDF evaluated in `x`.
        """
        assert x.shape[-1] == self.dimension
        return self._param_sampler.log_pdf(x) - np.log(self.weight(x))

[docs]    def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate gradient of log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of gradient (vector valued) of log-PDF evaluated in `x`.
        """
        raise NotImplementedError

[docs]    def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate Hessian of log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of Hessian (matrix valued) of log-PDF evaluated in `x`.
        """
        raise NotImplementedError

    def _compute_trial_sampler(self) -> tuple[Sampler, float]:
        """Trial sampler adaptation.

        Parameters
        ----------
        tsa : bool
            Adapt trial sampler or simply use uniform product sampler.

        Returns
        -------
        trial_sampler : pt.sampler.Sampler
            Trial sampler.
        bulk : array_like
            Domain estimate of the mass of the distribution.
        """
        sampler_list: list[Sampler] = []
        for param in self.parameter:
            if self._tsa is True and param.distribution == "uniform":
                sampler_list.append(BetaSampler(param.domain, 0.5, 0.5))
            else:
                sampler_list.append(UniformSampler(param.domain))
        trial_sampler = ProductSampler(sampler_list)
        bulk = get_maximum(lambda x: self.pdf(x) / trial_sampler.pdf(x), self.domain)
        return trial_sampler, bulk

[docs]    def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray:
        """Draw samples from distribution.

        Parameters
        ----------
        shape : array_like
            Shape of the samples.

        Returns
        -------
        :
            Random samples of specified shape.
        """
        if isinstance(shape, int):
            shape = (shape,)
        samples = rejection_sampling(
            self.pdf, self._trial_sampler, self._bulk, self.dimension, shape
        )
        return samples


[docs]class OWLSTensorSampler(Sampler):
    r"""Weighted least-squares sampler for tensor multivariate basis.

    Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}^M`
    with :math:`y\sim\pi=\prod_{m=1}^M\pi_m` for one dimensional densities
    :math:`\pi_m`, a tensor set of multiindices
    :math:`\Lambda=[d_1]\times\dots\times[d_M]\subset\mathbb{N}_0^M`,
    where :math:`[d_m]=\{0,\dots,d_m-1\}`, and a finite subset
    :math:`\{P_\alpha\}_{\alpha\in\Lambda}` of an orthonormal product
    polynomial basis of :math:`L^2(\Gamma,\pi)`, i.e.,
    :math:`P_\alpha(y) = \prod_{m=1}^M P_{\alpha_m}(y_m)`, the optimal
    weighted least-squares sampling distribution for a function
    :math:`u\in\operatorname{span}\{P_\alpha\ \vert\ \alpha\in\Lambda\}`
    reads

    .. math::
        \mathrm{d}\mu = w^{-1} \mathrm{d}\pi
        \qquad\mbox{with weight}\qquad
        w^{-1}(y) = \prod_{m=1}^M
                        \frac{1}{d_m} \sum_{\alpha_m\in[d_m]} |P_{\alpha_m}(y_m)|^2.

    Parameters
    ----------
    params : list of `pythia.parameter.Parameter`
        Parameter list.
    deg : list of int
        Polynomial degree of each component (same for all).
    tsa : bool, default=True
        Trial sampler adaptation. If True, a trial sampler is chosen on the
        distributions of parameters, if false a uniform trial sampler is
        used.

    Notes
    -----
    To generate samples from the weighted least-squares distribution rejection
    sampling is used. For certain basis functions it is possible to choose a
    well-suited trial sampler for the rejection sampling, which can be enabled
    via setting ``tsa=True``.

    See Also
    --------
    pythia.sampler.OWLSUnivariateSampler

    References
    ----------
    The optimal weighted least-squares sampling is based on the results in
    Cohen & Migliorati [1]_.
    """

    def __init__(
        self, params: list[Parameter], deg: list[int], tsa: bool = True
    ) -> None:
        """Initiate OWLSTensorSampler object."""
        self.parameter = params
        self.deg = deg
        self.domain = np.array([param.domain for param in self.parameter])
        self._tsa = tsa
        # self._uBasis = pt_basis.univariate_basis(self.parameter, self.deg)
        # self._univariate_samplers = [assign_sampler(param)
        #                              for param in self.parameter]
        self._univariate_samplers = [
            OWLSUnivariateSampler(p, d, self._tsa)
            for (p, d) in zip(self.parameter, self.deg)
        ]
        self._product_sampler = ProductSampler(self._univariate_samplers)

    @property
    def dimension(self) -> int:
        """Dimension of the parameters."""
        return self._product_sampler.dimension

    @property
    def mass(self) -> float:
        """Mass of the PDF."""
        return self._product_sampler.mass

    @property
    def maximum(self) -> float:
        """Maximum value of the PDF."""
        return self._product_sampler.maximum

    @property
    def mean(self) -> np.ndarray:
        """Mean of the PDF."""
        return self._product_sampler.mean

    @property
    def cov(self) -> np.ndarray:
        """Covariance of the PDF."""
        return self._product_sampler.cov

[docs]    def weight(self, x: np.ndarray) -> np.ndarray:
        """Weights for the PDF.

        Parameters
        ----------
        x : array_like
            Points the weight function is evaluated in.

        Returns
        -------
        :
            Weights of evaluation points `x`.
        """
        assert x.shape[-1] == self.dimension
        weights = [s.weight for s in self._univariate_samplers]
        val = np.array([weights[jj](x[..., jj]) for jj in range(self.dimension)])
        return np.prod(val, axis=0)

[docs]    def pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of PDF evaluated in `x`.
        """
        return self._product_sampler.pdf(x)

[docs]    def log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate log-PDF.

        The log-PDF is given by the sum of the univariate log-PDFs.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of log-PDF evaluated in `x`.
        """
        return self._product_sampler.log_pdf(x)

[docs]    def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate gradient of log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of gradient (vector valued) of log-PDF evaluated in `x`.
        """
        return self._product_sampler.grad_x_log_pdf(x)

[docs]    def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray:
        """Evaluate Hessian of log-PDF.

        Parameters
        ----------
        x : array_like
            Evaluation points.

        Returns
        -------
        :
            Values of Hessian (matrix valued) of log-PDF evaluated in `x`.
        """
        return self._product_sampler.hess_x_log_pdf(x)

[docs]    def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray:
        """Draw samples from distribution.

        Parameters
        ----------
        shape : array_like
            Shape of the samples.

        Returns
        -------
        :
            Random samples of specified shape.
        """
        return self._product_sampler.sample(shape)


[docs]@deprecated("WLSUnivariateSampler has been renamed to OWLSUnivariateSampler.")
class WLSUnivariateSampler(OWLSUnivariateSampler):
    pass


[docs]@deprecated("WLSTensorSampler has been renamed to OWLSTensorSampler.")
class WLSTensorSampler(OWLSTensorSampler):
    pass


[docs]@deprecated("WLSSampler has been renamed to OWLSSampler.")
class WLSSampler(OWLSSampler):
    pass