Source code for pythia.least_squares_sampler

import warnings
from typing import Callable

import numpy as np
from scipy.integrate import quad
from typing_extensions import deprecated

from . import basis as pt_basis
from .parameter import Parameter
from .sampler import (
    BetaSampler,
    ParameterSampler,
    ProductSampler,
    Sampler,
    UniformSampler,
    assign_sampler,
    get_maximum,
    rejection_sampling,
)


[docs]def owls_sampling_bound( dimension: int, stability_threshold: float = 0.5, failure_probability: float = 0.5 ) -> int: r"""Minimum sample size for stable optimally weighted least squares approximation. For a (weighted) linear least squares approximation problem in an :math:`m`-dimensional linear space, let :math:`G` denote the estimated Gramian matrix with respect to an :math:`L^2`-orthonormal basis of this space. The approximation error is small when :math:`G` is close to the idenity matrix. This function returns :math:`n_0 \in \mathbb{N}` such that an optimal sample (cf. [1]_) of size :math:`n \ge n_0` satisfies .. math:: \|G - I\|_2 \le \delta < 1 with a probability of at least :math:`1 - p`. Parameters ---------- dimension : int Dimension of the linear space (:math:`m \in \mathbb{N}`). stability_threshold : float, default=0.5 Stability threshold that should be achieved (:math:`\delta \in (0, 1)`). failure_probability : float, default=0.5 Failure probability with which the stability threshold is breached (:math:`p \in (0, 1)`). Returns ------- int A sample size :math:`n` ensuring that :math:`\|G - I\|_2` exceeds the `stability_threshold` :math:`\delta` with a `failure_probability` of at most :math:`p`. Notes ----- This function is derived from formula (3.1) in [1]_ .. math:: p = \mathbb{P}[\|I - G\|_2 > \delta] \le 2 m \exp\left(-\frac{c_\delta n}{m}\right) , where :math:`m` is the `dimension`, :math:`\delta` is the `stability_threshold`. References ---------- .. [1] A. Cohen, G. Migliorati. "Optimal weighted least-squares methods," in The SMAI journal of computational mathematics, vol. 3, pp. 181-203, 2017 """ assert isinstance(dimension, int) and dimension > 0 assert isinstance(stability_threshold, float) and 0 < stability_threshold < 1 assert isinstance(failure_probability, float) and 0 < failure_probability < 1 m, p, δ = dimension, failure_probability, stability_threshold c_δ = δ + (1 - δ) * np.log(1 - δ) return int(np.ceil(m / c_δ * (np.log(2 * m) - np.log(p))))
[docs]class OWLSUnivariateSampler(Sampler): r"""Sampler for univariate optimally distributed samples on given domain. Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}` with :math:`y\sim\pi` and a finite subset :math:`\{P_j\}_{j=0}^{d-1}` of an orthonormal polynomial basis of :math:`L^2(\Gamma,\pi)`, the optimal weighted least-squares sampling distribution for a function :math:`u\in\operatorname{span}\{P_j\ \vert\ j=0,\dots,d-1 \}` reads .. math:: \mathrm{d}\mu = w^{-1} \mathrm{d}\pi \qquad\mbox{with weight}\qquad w^{-1}(y) = \frac{1}{d}\sum_{j=0}^{d-1}\vert P_j(y)\vert^2. Parameters ---------- domain : array_like Interval of support of distribution. Notes ----- To generate samples from the weighted least-squares distribution rejection sampling is used. For certain basis functions it is possible to choose a well-suited trial sampler for the rejection sampling, which can be enabled via setting ``tsa=True``. See Also -------- pythia.sampler.OWLSTensorSampler References ---------- The optimal weighted least-squares sampling is based on the results in Cohen & Migliorati [1]_. .. [1] Cohen, A. and Migliorati, G., “Optimal weighted least-squares methods”, SMAI Journal of Computational Mathematics 3, 181-203 (2017). """ def __init__(self, param: Parameter, deg: int, tsa: bool = True) -> None: """Initiate OWLSUnivariateSampler object.""" self.parameter = param self.deg = deg self._base_sampler = assign_sampler(param) self.domain = self._base_sampler.domain self._basis = pt_basis.univariate_basis([self.parameter], [self.deg])[0] self._tsa = tsa self._trial_sampler, self._bulk = self._compute_trial_sampler() assert self.domain.shape == (1, 2) # self._length = self.domain[0, 1] - self.domain[0, 0] @property def dimension(self) -> int: """Parameter dimension.""" return self.domain.shape[0] @property def mass(self) -> float: """Mass of the PDF.""" return 1 @property def maximum(self) -> float: """Maximum value of the PDF.""" if self.parameter.distribution == "uniform": # alternatively return pdf in domain[0, 1] maximum = self.pdf(self.domain[0, 0]) assert isinstance(maximum, float) return maximum elif self.parameter.distribution == "normal": if self.deg % 2 == 0: assert self.parameter.mean is not None maximum = self.pdf(self.parameter.mean) assert isinstance(maximum, float) return maximum else: # Note: this can be improved return get_maximum(self.pdf, self.domain) elif self.parameter.distribution == "gamma": # Note: this is probably not going to work for domain = [a, inf] return get_maximum(self.pdf, self.domain) elif self.parameter.distribution == "beta": # Note: this can be improved return get_maximum(self.pdf, self.domain) return get_maximum(self.pdf, self.domain) @property def mean(self) -> float: """Mean value of the distribution.""" mean, err = quad( lambda x: x * self.pdf(x), self.domain[0, 0], self.domain[0, 1] ) if err > 1e-8: warnings.warn(f"quadrature error large: ({err})") return mean @property def cov(self) -> float: """(Co)Variance of the distribution.""" moment, err = quad( lambda x: x**2 * self.pdf(x), self.domain[0, 0], self.domain[0, 1] ) if err > 1e-8: warnings.warn(f"quadrature error large: ({err})") return moment - self.mean**2 @property def var(self) -> float: """Variance of the distribution.""" return self.cov @property def std(self) -> float: """Standard deviation of the distribution.""" return np.sqrt(self.var)
[docs] def weight(self, x: np.ndarray | float | int) -> np.ndarray: """Weights for the pdf. Parameters ---------- x : np.ndarray Points the weight function is evaluated in. Returns ------- w : array_like Weights of evaluation points `x`. """ if isinstance(x, (float, int)): x = np.array(x).reshape(1) if x.ndim == 2 and x.shape[1] == 1: x = np.array(x).reshape(-1) assert x.ndim < 2 b_eval = np.sum([np.abs(p(x)) ** 2 for p in self._basis], axis=0) return (self.deg + 1) / b_eval
[docs] def pdf(self, x: np.ndarray | np.floating | float | int) -> np.ndarray: """Evaluate uniform PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of PDF evaluated in `x`. """ x = np.asarray(x) return self._base_sampler.pdf(x) / self.weight(x)
[docs] def log_pdf(self, x: np.ndarray | float | int) -> np.ndarray: """Evaluate uniform log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of log-PDF evaluated in `x`. """ if isinstance(x, (float, int)): x = np.array(x).reshape(1) if x.ndim == 2 and x.shape[1] == 1: x = np.array(x).reshape(-1) return self._base_sampler.log_pdf(x) - np.log(self.weight(x))
[docs] def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate gradient of uniform log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of gradient (vector valued) of log-PDF evaluated in `x`. """ raise NotImplementedError
[docs] def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate Hessian of uniform log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of Hessian (matrix valued) of log-PDF evaluated in `x`. """ raise NotImplementedError
def _compute_trial_sampler(self) -> tuple[Sampler, float]: """Trial sampler adaptation. .. note:: TSA currently only available for uniform parameter distribution. Parameters ---------- tsa : bool Adapt trial sampler or simply use uniform product sampler. Returns ------- trial_sampler : pt.sampler.Sampler Trial sampler. bulk : array_like Domain estimate of the mass of the distribution. """ trial_sampler: Sampler if self._tsa is True and self.parameter.distribution == "uniform": trial_sampler = BetaSampler(self.parameter.domain, 0.5, 0.5) else: trial_sampler = UniformSampler(self.parameter.domain) bulk = get_maximum(lambda x: self.pdf(x) / trial_sampler.pdf(x), self.domain) return trial_sampler, bulk
[docs] def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray: """Draw samples from weighted least-squares parameter distribution. Parameters ---------- shape : array_like Shape of the samples. Returns ------- : Random samples of specified shape. """ if isinstance(shape, int): shape = (shape,) samples = rejection_sampling( self.pdf, self._trial_sampler, self._bulk, self.dimension, shape ) return samples
[docs]class OWLSSampler(Sampler): r"""Weighted Least-Squares sampler. Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}^M` with :math:`y\sim\pi`, a set of multiindices :math:`\Lambda\subset\mathbb{N}_0^M` and a finite subset :math:`\{P_\alpha\}_{\alpha\in\Lambda}` of an orthonormal polynomial basis of :math:`L^2(\Gamma,\pi)`, the optimal weighted least-squares sampling distribution for a function :math:`u\in\operatorname{span}\{P_\alpha\ \vert\ \alpha\in\Lambda\}` reads .. math:: \mathrm{d}\mu = w^{-1} \mathrm{d}\pi \qquad\mbox{with weight}\qquad w^{-1}(y) = \frac{1}{|\Lambda|}\sum_{\alpha\in\Lambda} |P_\alpha(y)|^2, where :math:`\vert\Lambda\vert` denotes the number of elements in :math:`\Lambda`. Parameters ---------- params : list of `pythia.parameter.Parameter` list of parameters. basis : list list of basis functions. tsa : bool, default=False Trial sampler adaptation. If True, a trial sampler is chosen on the distributions of parameters, if false a uniform trial sampler is used. Other Parameters ---------------- trial_sampler : pythia.sampler.Sampler, default=None Trial sampler for rejection sampling. If `tsa` is true and either `trial_sampler` or `bulk` are `None`, the trial sampler is chosen automatically. bulk : float, defaul=None Scaling for trial sampler. If `tsa` is true and either `trial_sampler` or `bulk` are `None`, the trial sampler is chosen automatically. Notes ----- To generate samples from the weighted least-squares distribution rejection sampling is used. For certain basis functions it is possible to choose a well-suited trial sampler for the rejection sampling, which can be enabled via setting ``tsa=True``. See Also -------- pythia.sampler.OWLSUnivariateSampler, pythia.sampler.OWLSTensorSampler References ---------- The optimal weighted least-squares sampling is based on the results of Cohen & Migliorati [1]_. """ def __init__( self, params: list[Parameter], basis: list[Callable], tsa: bool = True, trial_sampler: Sampler | None = None, bulk: float | None = None, ) -> None: """Initiate OWLSSampler object.""" self.parameter = params self._param_sampler = ParameterSampler(self.parameter) self.domain = self._param_sampler.domain self.basis = basis self._tsa = tsa if trial_sampler is None or bulk is None: self._trial_sampler, self._bulk = self._compute_trial_sampler() else: self._trial_sampler = trial_sampler self._bulk = bulk @property def dimension(self) -> int: """Dimension of the parameters.""" return self._param_sampler.dimension @property def mass(self) -> float: """Mass of the PDF.""" return 1 @property def maximum(self) -> float: """Maximum value of the PDF.""" return get_maximum(self.pdf, self.domain) @property def mean(self) -> np.ndarray: """Mean of the PDF.""" raise NotImplementedError @property def cov(self) -> np.ndarray: """Covariance of the PDF.""" raise NotImplementedError
[docs] def weight(self, x: np.ndarray) -> np.ndarray: """Weights for the PDF. Parameters ---------- x : array_like Points the weight function is evaluated in. Returns ------- : weights of evaluation points `x`. """ km = np.sum([abs(p(x)) ** 2 for p in self.basis], axis=0) return len(self.basis) / km
[docs] def pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of PDF evaluated in `x`. """ assert x.shape[-1] == self.dimension return self._param_sampler.pdf(x) / self.weight(x)
[docs] def log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate log-PDF. The log-PDF is given by the sum of the univariate log-PDFs. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of log-PDF evaluated in `x`. """ assert x.shape[-1] == self.dimension return self._param_sampler.log_pdf(x) - np.log(self.weight(x))
[docs] def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate gradient of log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of gradient (vector valued) of log-PDF evaluated in `x`. """ raise NotImplementedError
[docs] def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate Hessian of log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of Hessian (matrix valued) of log-PDF evaluated in `x`. """ raise NotImplementedError
def _compute_trial_sampler(self) -> tuple[Sampler, float]: """Trial sampler adaptation. Parameters ---------- tsa : bool Adapt trial sampler or simply use uniform product sampler. Returns ------- trial_sampler : pt.sampler.Sampler Trial sampler. bulk : array_like Domain estimate of the mass of the distribution. """ sampler_list: list[Sampler] = [] for param in self.parameter: if self._tsa is True and param.distribution == "uniform": sampler_list.append(BetaSampler(param.domain, 0.5, 0.5)) else: sampler_list.append(UniformSampler(param.domain)) trial_sampler = ProductSampler(sampler_list) bulk = get_maximum(lambda x: self.pdf(x) / trial_sampler.pdf(x), self.domain) return trial_sampler, bulk
[docs] def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray: """Draw samples from distribution. Parameters ---------- shape : array_like Shape of the samples. Returns ------- : Random samples of specified shape. """ if isinstance(shape, int): shape = (shape,) samples = rejection_sampling( self.pdf, self._trial_sampler, self._bulk, self.dimension, shape ) return samples
[docs]class OWLSTensorSampler(Sampler): r"""Weighted least-squares sampler for tensor multivariate basis. Given a stochastic variable :math:`y\in\Gamma\subset\mathbb{R}^M` with :math:`y\sim\pi=\prod_{m=1}^M\pi_m` for one dimensional densities :math:`\pi_m`, a tensor set of multiindices :math:`\Lambda=[d_1]\times\dots\times[d_M]\subset\mathbb{N}_0^M`, where :math:`[d_m]=\{0,\dots,d_m-1\}`, and a finite subset :math:`\{P_\alpha\}_{\alpha\in\Lambda}` of an orthonormal product polynomial basis of :math:`L^2(\Gamma,\pi)`, i.e., :math:`P_\alpha(y) = \prod_{m=1}^M P_{\alpha_m}(y_m)`, the optimal weighted least-squares sampling distribution for a function :math:`u\in\operatorname{span}\{P_\alpha\ \vert\ \alpha\in\Lambda\}` reads .. math:: \mathrm{d}\mu = w^{-1} \mathrm{d}\pi \qquad\mbox{with weight}\qquad w^{-1}(y) = \prod_{m=1}^M \frac{1}{d_m} \sum_{\alpha_m\in[d_m]} |P_{\alpha_m}(y_m)|^2. Parameters ---------- params : list of `pythia.parameter.Parameter` Parameter list. deg : list of int Polynomial degree of each component (same for all). tsa : bool, default=True Trial sampler adaptation. If True, a trial sampler is chosen on the distributions of parameters, if false a uniform trial sampler is used. Notes ----- To generate samples from the weighted least-squares distribution rejection sampling is used. For certain basis functions it is possible to choose a well-suited trial sampler for the rejection sampling, which can be enabled via setting ``tsa=True``. See Also -------- pythia.sampler.OWLSUnivariateSampler References ---------- The optimal weighted least-squares sampling is based on the results in Cohen & Migliorati [1]_. """ def __init__( self, params: list[Parameter], deg: list[int], tsa: bool = True ) -> None: """Initiate OWLSTensorSampler object.""" self.parameter = params self.deg = deg self.domain = np.array([param.domain for param in self.parameter]) self._tsa = tsa # self._uBasis = pt_basis.univariate_basis(self.parameter, self.deg) # self._univariate_samplers = [assign_sampler(param) # for param in self.parameter] self._univariate_samplers = [ OWLSUnivariateSampler(p, d, self._tsa) for (p, d) in zip(self.parameter, self.deg) ] self._product_sampler = ProductSampler(self._univariate_samplers) @property def dimension(self) -> int: """Dimension of the parameters.""" return self._product_sampler.dimension @property def mass(self) -> float: """Mass of the PDF.""" return self._product_sampler.mass @property def maximum(self) -> float: """Maximum value of the PDF.""" return self._product_sampler.maximum @property def mean(self) -> np.ndarray: """Mean of the PDF.""" return self._product_sampler.mean @property def cov(self) -> np.ndarray: """Covariance of the PDF.""" return self._product_sampler.cov
[docs] def weight(self, x: np.ndarray) -> np.ndarray: """Weights for the PDF. Parameters ---------- x : array_like Points the weight function is evaluated in. Returns ------- : Weights of evaluation points `x`. """ assert x.shape[-1] == self.dimension weights = [s.weight for s in self._univariate_samplers] val = np.array([weights[jj](x[..., jj]) for jj in range(self.dimension)]) return np.prod(val, axis=0)
[docs] def pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of PDF evaluated in `x`. """ return self._product_sampler.pdf(x)
[docs] def log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate log-PDF. The log-PDF is given by the sum of the univariate log-PDFs. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of log-PDF evaluated in `x`. """ return self._product_sampler.log_pdf(x)
[docs] def grad_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate gradient of log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of gradient (vector valued) of log-PDF evaluated in `x`. """ return self._product_sampler.grad_x_log_pdf(x)
[docs] def hess_x_log_pdf(self, x: np.ndarray) -> np.ndarray: """Evaluate Hessian of log-PDF. Parameters ---------- x : array_like Evaluation points. Returns ------- : Values of Hessian (matrix valued) of log-PDF evaluated in `x`. """ return self._product_sampler.hess_x_log_pdf(x)
[docs] def sample(self, shape: int | list | tuple | np.ndarray) -> np.ndarray: """Draw samples from distribution. Parameters ---------- shape : array_like Shape of the samples. Returns ------- : Random samples of specified shape. """ return self._product_sampler.sample(shape)
[docs]@deprecated("WLSUnivariateSampler has been renamed to OWLSUnivariateSampler.") class WLSUnivariateSampler(OWLSUnivariateSampler): pass
[docs]@deprecated("WLSTensorSampler has been renamed to OWLSTensorSampler.") class WLSTensorSampler(OWLSTensorSampler): pass
[docs]@deprecated("WLSSampler has been renamed to OWLSSampler.") class WLSSampler(OWLSSampler): pass