Source code for gptools.kernel.matern

# Copyright 2014 Mark Chilenski
# This program is distributed under the terms of the GNU General Purpose License (GPL).
# Refer to http://www.gnu.org/licenses/gpl.txt
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Provides the :py:class:`MaternKernel` class which implements the anisotropic Matern kernel.
"""

from __future__ import division

import warnings

from .core import ChainRuleKernel, ArbitraryKernel, Kernel
from ..utils import generate_set_partitions, unique_rows, yn2Kn2Der, fixed_poch
try:
    from ._matern import _matern52
except ImportError:
    warnings.warn(
        "Could not import _matern, the extension might not have been built "
        "properly. The optimized Matern52Kernel will not be available.",
        ImportWarning
    )

import scipy
import scipy.special
try:
    import mpmath
except ImportError:
    warnings.warn("Could not import mpmath. Certain functions of the Matern kernel will not function.",
                  ImportWarning)

[docs]def matern_function(Xi, Xj, *args): r"""Matern covariance function of arbitrary dimension, for use with :py:class:`ArbitraryKernel`. The Matern kernel has the following hyperparameters, always referenced in the order listed: = ===== ==================================== 0 sigma prefactor 1 nu order of kernel 2 l1 length scale for the first dimension 3 l2 ...and so on for all dimensions = ===== ==================================== The kernel is defined as: .. math:: k_M = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} \left (\sqrt{2\nu \sum_i\left (\frac{\tau_i^2}{l_i^2}\right )}\right )^\nu K_\nu\left(\sqrt{2\nu \sum_i\left(\frac{\tau_i^2}{l_i^2}\right)}\right) Parameters ---------- Xi, Xj : :py:class:`Array`, :py:class:`mpf`, tuple or scalar float Points to evaluate the covariance between. If they are :py:class:`Array`, :py:mod:`scipy` functions are used, otherwise :py:mod:`mpmath` functions are used. *args Remaining arguments are the 2+num_dim hyperparameters as defined above. """ num_dim = len(args) - 2 nu = args[1] if isinstance(Xi, scipy.ndarray): if isinstance(Xi, scipy.matrix): Xi = scipy.asarray(Xi, dtype=float) Xj = scipy.asarray(Xj, dtype=float) tau = scipy.asarray(Xi - Xj, dtype=float) l_mat = scipy.tile(args[-num_dim:], (tau.shape[0], 1)) r2l2 = scipy.sum((tau / l_mat)**2, axis=1) y = scipy.sqrt(2.0 * nu * r2l2) k = 2.0**(1 - nu) / scipy.special.gamma(nu) * y**nu * scipy.special.kv(nu, y) k[r2l2 == 0] = 1 else: try: tau = [xi - xj for xi, xj in zip(Xi, Xj)] except TypeError: tau = Xi - Xj try: r2l2 = sum([(t / l)**2 for t, l in zip(tau, args[2:])]) except TypeError: r2l2 = (tau / args[2])**2 y = mpmath.sqrt(2.0 * nu * r2l2) k = 2.0**(1 - nu) / mpmath.gamma(nu) * y**nu * mpmath.besselk(nu, y) k *= args[0]**2.0 return k
[docs]class MaternKernelArb(ArbitraryKernel): r"""Matern covariance kernel. Supports arbitrary derivatives. Treats order as a hyperparameter. This version of the Matern kernel is painfully slow, but uses :py:mod:`mpmath` to ensure the derivatives are computed properly, since there may be issues with the regular :py:class:`MaternKernel`. The Matern kernel has the following hyperparameters, always referenced in the order listed: = ===== ==================================== 0 sigma prefactor 1 nu order of kernel 2 l1 length scale for the first dimension 3 l2 ...and so on for all dimensions = ===== ==================================== The kernel is defined as: .. math:: k_M = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} \left (\sqrt{2\nu \sum_i\left (\frac{\tau_i^2}{l_i^2}\right )}\right )^\nu K_\nu\left(\sqrt{2\nu \sum_i\left(\frac{\tau_i^2}{l_i^2}\right)}\right) Parameters ---------- **kwargs All keyword parameters are passed to :py:class:`~gptools.kernel.core.ArbitraryKernel`. """ def __init__(self, **kwargs): param_names = [r'\sigma_f', r'\nu'] + ['l_%d' % (i + 1,) for i in range(0, kwargs.get('num_dim', 1))] super(MaternKernelArb, self).__init__(matern_function, num_params=2 + kwargs.get('num_dim', 1), param_names=param_names, **kwargs) @property def nu(self): r"""Returns the value of the order :math:`\nu`. """ return self.params[1]
[docs]class MaternKernel1d(Kernel): r"""Matern covariance kernel. Only for univariate data. Only supports up to first derivatives. Treats order as a hyperparameter. The Matern kernel has the following hyperparameters, always referenced in the order listed: = ===== =============== 0 sigma prefactor 1 nu order of kernel 2 l1 length scale = ===== =============== The kernel is defined as: .. math:: k_M = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} \left (\sqrt{2\nu \left (\frac{\tau^2}{l_1^2}\right )}\right )^\nu K_\nu\left(\sqrt{2\nu \left(\frac{\tau^2}{l_1^2}\right)}\right) where :math:`\tau=X_i-X_j`. Note that the expressions for the derivatives break down for :math:`\nu < 1`. Parameters ---------- **kwargs All keyword parameters are passed to :py:class:`~gptools.kernel.core.Kernel`. """ def __init__(self, **kwargs): param_names = [r'\sigma_f', r'\nu', r'l_1'] super(MaternKernel1d, self).__init__( num_dim=1, num_params=3, param_names=param_names, **kwargs )
[docs] def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. ni : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `j`. hyper_deriv : Non-negative int or None, optional The index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Hyperparameter derivatives are not supported at this point. Default is None. symmetric : bool, optional Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is not None. """ if hyper_deriv is not None: raise NotImplementedError("Hyperparameter derivatives have not been implemented!") n_combined = scipy.asarray(scipy.hstack((ni, nj)), dtype=int) n_combined_unique = unique_rows(n_combined) x_y = scipy.asarray(Xi, dtype=float).ravel() - scipy.asarray(Xj, dtype=float).ravel() zero_mask = x_y == 0 q = scipy.sqrt(2 * self.params[1] * x_y**2) / self.params[2] k = scipy.zeros(Xi.shape[0], dtype=float) for n_combined_state in n_combined_unique: idxs = (n_combined == n_combined_state).all(axis=1) # Derviative expressions evaluated with Mathematica, assuming l>0. if (n_combined_state == scipy.asarray([0, 0])).all(): k[idxs] = q[idxs]**self.params[1] * scipy.special.kv(self.params[1], q[idxs]) k[idxs & zero_mask] = 2**(self.params[1] - 1) * scipy.special.gamma(self.params[1]) elif (n_combined_state == scipy.asarray([1, 0])).all(): k[idxs] = -1.0 / x_y[idxs] * q[idxs]**(1 + self.params[1]) * scipy.special.kv(self.params[1] - 1, q[idxs]) k[idxs & zero_mask] = 0.0 elif (n_combined_state == scipy.asarray([0, 1])).all(): k[idxs] = 1.0 / x_y[idxs] * q[idxs]**(1 + self.params[1]) * scipy.special.kv(self.params[1] - 1, q[idxs]) k[idxs & zero_mask] = 0.0 elif (n_combined_state == scipy.asarray([1, 1])).all(): k[idxs] = 2.0 * self.params[1] / (self.params[2])**2 * q[idxs]**self.params[1] * ( -scipy.special.kv(self.params[1] - 2, q[idxs]) + scipy.special.kv(self.params[1] - 1, q[idxs]) / q[idxs] ) # Had to assume nu > 1 for this to work: CHECK THIS! k[idxs & zero_mask] = 2**(self.params[1] - 1) * self.params[1] * scipy.special.gamma(self.params[1] - 1) / (self.params[2]**2) else: raise NotImplementedError("Derivatives greater than [1, 1] are not supported!") k = (self.params[0]**2 * 2**(1 - self.params[1]) / scipy.special.gamma(self.params[1])) * k return k
[docs]class MaternKernel(ChainRuleKernel): r"""Matern covariance kernel. Supports arbitrary derivatives. Treats order as a hyperparameter. The Matern kernel has the following hyperparameters, always referenced in the order listed: = ===== ==================================== 0 sigma prefactor 1 nu order of kernel 2 l1 length scale for the first dimension 3 l2 ...and so on for all dimensions = ===== ==================================== The kernel is defined as: .. math:: k_M = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} \left (\sqrt{2\nu \sum_i\left (\frac{\tau_i^2}{l_i^2}\right )}\right )^\nu K_\nu\left(\sqrt{2\nu \sum_i\left(\frac{\tau_i^2}{l_i^2}\right)}\right) Parameters ---------- num_dim : int Number of dimensions of the input data. Must be consistent with the `X` and `Xstar` values passed to the :py:class:`~gptools.gaussian_process.GaussianProcess` you wish to use the covariance kernel with. **kwargs All keyword parameters are passed to :py:class:`~gptools.kernel.core.ChainRuleKernel`. Raises ------ ValueError If `num_dim` is not a positive integer or the lengths of the input vectors are inconsistent. GPArgumentError If `fixed_params` is passed but `initial_params` is not. """ def __init__(self, num_dim=1, **kwargs): param_names = [r'\sigma_f', r'\nu'] + ['l_%d' % (i + 1,) for i in range(0, num_dim)] super(MaternKernel, self).__init__(num_dim=num_dim, num_params=num_dim + 2, param_names=param_names, **kwargs) def _compute_k(self, tau): r"""Evaluate the kernel directly at the given values of `tau`. Parameters ---------- tau : :py:class:`Matrix`, (`M`, `D`) `M` inputs with dimension `D`. Returns ------- k : :py:class:`Array`, (`M`,) :math:`k(\tau)` (less the :math:`\sigma^2` prefactor). """ y, r2l2 = self._compute_y(tau, return_r2l2=True) k = 2.0**(1.0 - self.nu) / scipy.special.gamma(self.nu) * y**(self.nu / 2.0) * scipy.special.kv(self.nu, scipy.sqrt(y)) k[r2l2 == 0] = 1.0 return k def _compute_y(self, tau, return_r2l2=False): r"""Covert tau to :math:`y=2\nu\sum_i(\tau_i^2/l_i^2)`. Parameters ---------- tau : :py:class:`Matrix`, (`M`, `D`) `M` inputs with dimension `D`. return_r2l2 : bool, optional Set to True to return a tuple of (`y`, `r2l2`). Default is False (only return `y`). Returns ------- y : :py:class:`Array`, (`M`,) Inner argument of function. r2l2 : :py:class:`Array`, (`M`,) Anisotropically scaled distances. Only returned if `return_r2l2` is True. """ r2l2 = self._compute_r2l2(tau) y = 2.0 * self.nu * r2l2 if return_r2l2: return (y, r2l2) else: return y def _compute_y_wrapper(self, *args): r"""Convert tau to :math:`y=\sqrt{2\nu\sum_i(\tau_i^2/l_i^2)}`. Takes `tau` as an argument list for compatibility with :py:func:`mpmath.diff`. Parameters ---------- tau[0] : scalar float First element of `tau`. tau[1] : And so on... Returns ------- y : scalar float Inner part of Matern kernel at the given `tau`. """ return self._compute_y(scipy.atleast_2d(scipy.asarray(args, dtype=float))) def _compute_dk_dy(self, y, n): r"""Evaluate the derivative of the outer form of the Matern kernel. Uses the general Leibniz rule to compute the n-th derivative of: .. math:: f(y) = \frac{2^{1-\nu}}{\Gamma(\nu)} y^{\nu/2} K_\nu(y^{1/2}) Parameters ---------- y : :py:class:`Array`, (`M`,) `M` inputs to evaluate at. n : non-negative scalar int. Order of derivative to compute. Returns ------- dk_dy : :py:class:`Array`, (`M`,) Specified derivative at specified locations. """ return 2.0**(1 - self.nu) / (scipy.special.gamma(self.nu)) * yn2Kn2Der(self.nu, y, n=n) def _compute_dy_dtau(self, tau, b, r2l2): r"""Evaluate the derivative of the inner argument of the Matern kernel. Take the derivative of .. math:: y = 2 \nu \sum_i(\tau_i^2 / l_i^2) Parameters ---------- tau : :py:class:`Matrix`, (`M`, `D`) `M` inputs with dimension `D`. b : :py:class:`Array`, (`P`,) Block specifying derivatives to be evaluated. r2l2 : :py:class:`Array`, (`M`,) Precomputed anisotropically scaled distance. Returns ------- dy_dtau: :py:class:`Array`, (`M`,) Specified derivative at specified locations. """ if len(b) == 0: return self._compute_y(tau) elif len(b) == 1: return 4.0 * self.nu * tau[:, b[0]] / (self.params[2 + b[0]])**2.0 elif (len(b) == 2) and (b[0] == b[1]): return 4.0 * self.nu / (self.params[2 + b[0]])**2.0 else: return scipy.zeros_like(r2l2) def _compute_dk_dtau_on_partition(self, tau, p): """Evaluate the term inside the sum of Faa di Bruno's formula for the given partition. Overrides the version from :py:class:`gptools.kernel.core.ChainRuleKernel` in order to get the correct behavior at the origin. Parameters ---------- tau : :py:class:`Matrix`, (`M`, `D`) `M` inputs with dimension `D`. p : list of :py:class:`Array` Each element is a block of the partition representing the derivative orders to use. Returns ------- dk_dtau : :py:class:`Array`, (`M`,) The specified derivatives over the given partition at the specified locations. """ # Find the derivative order: n = len(p) y, r2l2 = self._compute_y(tau, return_r2l2=True) # Keep track of how many times a given variable has a block of length 1: n1 = 0 # Build the dy/dtau factor up iteratively: dy_dtau_factor = scipy.ones_like(y) for b in p: # If the partial derivative is exactly zero there is no sense in # continuing the computation: if (len(b) > 2) or ((len(b) == 2) and (b[0] != b[1])): return scipy.zeros_like(y) dy_dtau_factor *= self._compute_dy_dtau(tau, b, r2l2) # Count the number of blocks of length 1: if len(b) == 1: n1 += 1.0 # Compute d^(|pi|)f/dy^(|pi|) term: dk_dy = self._compute_dk_dy(y, n) if n1 > 0: mask = (y == 0.0) tau_pow = 2 * (self.nu - n) + n1 if tau_pow == 0: # In this case the limit does not exist, so it is set to NaN: dk_dy[mask] = scipy.nan elif tau_pow > 0: dk_dy[mask] = 0.0 return dk_dy * dy_dtau_factor @property def nu(self): r"""Returns the value of the order :math:`\nu`. """ return self.params[1]
[docs]class Matern52Kernel(Kernel): r"""Matern 5/2 covariance kernel. Supports only 0th and 1st derivatives and is fixed at nu=5/2. Because of these limitations, it is quite a bit faster than the more general Matern kernels. The Matern52 kernel has the following hyperparameters, always referenced in the order listed: = ===== ==================================== 0 sigma prefactor 1 l1 length scale for the first dimension 2 l2 ...and so on for all dimensions = ===== ==================================== The kernel is defined as: .. math:: k_M(x, x') = \sigma^2 \left(1 + \sqrt{5r^2} + \frac{5}{3}r^2\right) \exp(-\sqrt{5r^2}) \\ r^2 = \sum_{d=1}^D \frac{(x_d - x'_d)^2}{l_d^2} Parameters ---------- num_dim : int Number of dimensions of the input data. Must be consistent with the `X` and `Xstar` values passed to the :py:class:`~gptools.gaussian_process.GaussianProcess` you wish to use the covariance kernel with. **kwargs All keyword parameters are passed to :py:class:`~gptools.kernel.core.Kernel`. Raises ------ ValueError If `num_dim` is not a positive integer or the lengths of the input vectors are inconsistent. GPArgumentError If `fixed_params` is passed but `initial_params` is not. """ def __init__(self, num_dim=1, **kwargs): param_names = [r'\sigma_f'] + ['l_%d' % (i + 1,) for i in range(0, num_dim)] super(Matern52Kernel, self).__init__(num_dim=num_dim, num_params=num_dim + 1, param_names=param_names, **kwargs)
[docs] def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. ni : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `j`. hyper_deriv : Non-negative int or None, optional The index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Hyperparameter derivatives are not supported at this point. Default is None. symmetric : bool Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is not None. """ if hyper_deriv is not None: raise NotImplementedError("Hyperparameter derivatives have not been implemented!") if scipy.any(scipy.sum(ni, axis=1) > 1) or scipy.any(scipy.sum(nj, axis=1) > 1): raise ValueError("Matern52Kernel only supports 0th and 1st order derivatives") Xi = scipy.asarray(Xi, dtype=scipy.float64) Xj = scipy.asarray(Xj, dtype=scipy.float64) ni = scipy.array(ni, dtype=scipy.int32) nj = scipy.array(nj, dtype=scipy.int32) var = scipy.square(self.params[-self.num_dim:]) value = _matern52(Xi, Xj, ni, nj, var) return self.params[0]**2 * value