Source code for gptools.gaussian_process

# Copyright 2014 Mark Chilenski
# This program is distributed under the terms of the GNU General Purpose License (GPL).
# Refer to http://www.gnu.org/licenses/gpl.txt
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Provides the base :py:class:`GaussianProcess` class.
"""

from __future__ import division

from .error_handling import GPArgumentError, GPImpossibleParamsError
from .kernel import Kernel, ZeroKernel, DiagonalNoiseKernel
from .utils import wrap_fmin_slsqp, univariate_envelope_plot, CombinedBounds, unique_rows, plot_sampler, summarize_sampler

import scipy
import scipy.linalg
import scipy.optimize
import scipy.stats
import numpy.random
import numpy.linalg
import sys
import warnings
import traceback
import multiprocessing
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
try:
    import emcee
    from emcee.interruptible_pool import InterruptiblePool
except ImportError:
    warnings.warn(
        "Could not import emcee: MCMC sampling will not be available.",
        ImportWarning
    )
    InterruptiblePool = multiprocessing.Pool
try:
    import triangle
except ImportError:
    warnings.warn(
        "Could not import triangle: plotting of MCMC results will not be available.",
        ImportWarning
    )

[docs]class GaussianProcess(object): r"""Gaussian process. If called with one argument, an untrained Gaussian process is constructed and data must be added with the :py:meth:`add_data` method. If called with the optional keywords, the values given are used as the data. It is always possible to add additional data with :py:meth:`add_data`. Note that the attributes have no write protection, but you should always add data with :py:meth:`add_data` to ensure internal consistency. Parameters ---------- k : :py:class:`~gptools.kernel.core.Kernel` instance Kernel instance corresponding to the desired noise-free covariance kernel of the Gaussian process. The noise is handled separately either through specification of `err_y`, or in a separate kernel. This allows noise-free predictions when needed. noise_k : :py:class:`~gptools.kernel.core.Kernel` instance Kernel instance corresponding to the noise portion of the desired covariance kernel of the Gaussian process. Note that you DO NOT need to specify this if the extent of the noise you want to represent is contained in `err_y` (or if your data are noiseless). Default value is None, which results in the :py:class:`~gptools.kernel.noise.ZeroKernel` (noise specified elsewhere or not present). diag_factor : float, optional Factor of :py:attr:`sys.float_info.epsilon` which is added to the diagonal of the total `K` matrix to improve the stability of the Cholesky decomposition. If you are having issues, try increasing this by a factor of 10 at a time. Default is 1e2. mu : :py:class:`~gptools.mean.MeanFunction` instance The mean function of the Gaussian process. Default is None (zero mean prior). NOTE The following are all passed to :py:meth:`add_data`, refer to its docstring. X : array, (`N`, `D`), optional `M` input values of dimension `D`. Default value is None (no data). y : array, (`M`,), optional `M` data target values. Default value is None (no data). err_y : array, (`M`,), optional Error (given as standard deviation) in the `M` training target values. Default value is 0 (noiseless observations). n : array, (`N`, `D`) or scalar float, optional Non-negative integer values only. Degree of derivative for each target. If `n` is a scalar it is taken to be the value for all points in `y`. Otherwise, the length of n must equal the length of `y`. Default value is 0 (observation of target value). If non-integer values are passed, they will be silently rounded. T : array, (`M`, `N`), optional Linear transformation to get from latent variables to data in the argument `y`. When `T` is passed the argument `y` holds the transformed quantities `y=TY(X)` where `y` are the observed values of the transformed quantities, `T` is the transformation matrix and `Y(X)` is the underlying (untransformed) values of the function to be fit that enter into the transformation. When `T` is `M`-by-`N` and `y` has `M` elements, `X` and `n` will both be `N`-by-`D`. Default is None (no transformation). use_hyper_deriv : bool, optional If True, the elements needed to compute the derivatives of the log-likelihood with respect to the hyperparameters will be computed. Default is False (do not compute these elements). Derivatives with respect to hyperparameters are currently only supported for the squared exponential covariance kernel. verbose : bool, optional If True, warnings which are produced internally but are not critical will not be generated. This does not control when a warning happens in a routine which is called, however. Default is False (do not produce warnings). Attributes ---------- num_dim : int Number of dimensions, `D`. This is actually a getter method with a property decorator which retrieves the value from :py:attr:`k`. k : :py:class:`~gptools.kernel.core.Kernel` instance The non-noise portion of the covariance kernel. noise_k : :py:class:`~gptools.kernel.core.Kernel` instance The noise portion of the covariance kernel. mu : :py:class:`~gptools.mean.MeanFunction` instance Parametric mean function. hyperprior : :py:class:`~gptools.utils.JointPrior` instance Prior distribution for the hyperparameters. This is actually a getter method with a property decorator which combines the prior distributions from :py:attr:`k`, :py:attr:`noise_k` and :py:attr:`mu`. X : array, (`M`, `D`) The `M` training input values, each of which is of dimension `D`. n : array, (`M`, `D`) The orders of derivatives that each of the `M` training points represent, indicating the order of derivative with respect to each of the `D` dimensions. T : array, (`M`, `N`) The transformation matrix applied to the training data. If this is not None, `X` and `n` will be `N`-by-`D`. y : array, (`M`,) The `M` training target values. err_y : array, (`M`,) The uncorrelated, possibly heteroscedastic uncertainty in the `M` training input values. K : array, (`M`, `M`) Covariance matrix between all of the training inputs. noise_K : array, (`M`, `M`) Noise portion of the covariance matrix between all of the training inputs. Note that if :py:attr:`T` is present this is applied between all of the training quadrature points, so additional noise on the transformed observations cannot be inferred. L : array, (`M`, `M`) Lower-triangular Cholesky decomposition of the total covariance matrix between all of the training inputs. alpha : array, (`M`, 1) Solution to :math:`K\alpha = y`. ll : float Log-posterior density of the model. diag_factor : float The factor of :py:attr:`sys.float_info.epsilon` which is added to the diagonal of the :py:attr:`K` matrix to improve stability. K_up_to_date : bool True if no data have been added since the last time the internal state was updated with a call to :py:meth:`compute_K_L_alpha_ll`. use_hyper_deriv : bool Whether or not the derivative of the log-posterior with respect to the hyperparameters should be computed/used. verbose : bool Whether or not to print non-critical, internally-generated warnings. params : :py:class:`~gptools.utils.CombinedBounds` The current values of the hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the hyperparameters to be modified in place. param_bounds : :py:class:`~gptools.utils.CombinedBounds` The current bounds on the hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the bounds to be modified in place, assuming the hyperprior supports it. param_names : :py:class:`~gptools.utils.CombinedBounds` The names of the hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the names to be modified in place. fixed_params : :py:class:`~gptools.utils.CombinedBounds` A list of boolean flags indicating which of the hyperparameters of the covariance kernel, noise covariance kernel and mean function (in that order) are to be held fixed while optimizing or sampling the hyperparameters. This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the flags to be modified in place. free_params : :py:class:`~gptools.utils.CombinedBounds` The current values of the free hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the hyperparameters to be modified in place. free_param_bounds : :py:class:`~gptools.utils.CombinedBounds` The current bounds on the free hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the bounds to be modified in place, assuming the hyperprior supports it. free_param_names : :py:class:`~gptools.utils.CombinedBounds` The names of the free hyperparameters for the covariance kernel, noise covariance kernel and mean function (in that order). This is actually a getter method with a property decorator which returns a :py:class:`~gptools.utils.CombinedBounds` instance. This permits the names to be modified in place. Raises ------ TypeError `k` or `noise_k` is not an instance of :py:class:`~gptools.kernel.core.Kernel`. GPArgumentError Gave `X` but not `y` (or vice versa). ValueError Training data rejected by :py:meth:`add_data`. See Also -------- add_data : Used to process `X`, `y`, `err_y` and to add data. """ def __init__(self, k, noise_k=None, X=None, y=None, err_y=0, n=0, T=None, diag_factor=1e2, mu=None, use_hyper_deriv=False, verbose=False): if not isinstance(k, Kernel): raise TypeError( "Argument k must be an instance of Kernel when constructing " "GaussianProcess!" ) if noise_k is None: noise_k = ZeroKernel(k.num_dim) else: if not isinstance(noise_k, Kernel): raise TypeError( "Keyword noise_k must be an instance of Kernel when " "constructing GaussianProcess!" ) self.mu = mu self.diag_factor = diag_factor self.k = k self.noise_k = noise_k self.use_hyper_deriv = use_hyper_deriv self.verbose = verbose # Set the placeholder shapes: self.y = scipy.array([], dtype=float) self.X = None self.err_y = scipy.array([], dtype=float) self.n = None self.T = None if X is not None: if y is None: raise GPArgumentError( "Must pass both X and y when constructing GaussianProcess!" ) else: self.add_data(X, y, err_y=err_y, n=n, T=T) elif X is None and y is not None: raise GPArgumentError( "Must pass both X and y when constructing GaussianProcess!" ) else: self.K_up_to_date = False # The following are getters/setters for the (hyper)parameters of the model. # Right now they pull from the kernel, noise kernel and mean function. # Modify them to add more complicated things you want to infer. # TODO: These getters don't handle assignment by index! @property def hyperprior(self): """Combined hyperprior for the kernel, noise kernel and (if present) mean function. """ hp = self.k.hyperprior * self.noise_k.hyperprior if self.mu is not None: hp *= self.mu.hyperprior return hp # TODO: Is there a clever way to globally set the hyperprior? # @hyperprior.setter # def hyperprior(self, value): # pass @property def fixed_params(self): """Combined fixed hyperparameter flags for the kernel, noise kernel and (if present) mean function. """ fp = CombinedBounds(self.k.fixed_params, self.noise_k.fixed_params) if self.mu is not None: fp = CombinedBounds(fp, self.mu.fixed_params) return fp @fixed_params.setter def fixed_params(self, value): value = scipy.asarray(value, dtype=bool) self.k.fixed_params = value[:self.k.num_params] self.noise_k.fixed_params = value[self.k.num_params:self.k.num_params + self.noise_k.num_params] if self.mu is not None: self.mu.fixed_params = value[self.k.num_params + self.noise_k.num_params:] @property def params(self): """Combined hyperparameters for the kernel, noise kernel and (if present) mean function. """ p = CombinedBounds(self.k.params, self.noise_k.params) if self.mu is not None: p = CombinedBounds(p, self.mu.params) return p @params.setter def params(self, value): value = scipy.asarray(value, dtype=float) self.K_up_to_date = False self.k.params = value[:self.k.num_params] self.noise_k.params = value[self.k.num_params:self.k.num_params + self.noise_k.num_params] if self.mu is not None: self.mu.params = value[self.k.num_params + self.noise_k.num_params:] @property def param_bounds(self): """Combined bounds for the hyperparameters for the kernel, noise kernel and (if present) mean function. """ return self.hyperprior.bounds @param_bounds.setter def param_bounds(self, value): self.hyperprior.bounds = value @property def param_names(self): """Combined names for the hyperparameters for the kernel, noise kernel and (if present) mean function. """ pn = CombinedBounds(self.k.param_names, self.noise_k.param_names) if self.mu is not None: pn = CombinedBounds(pn, self.mu.param_names) return pn @param_names.setter def param_names(self, value): self.k.param_names = value[:self.k.num_params] self.noise_k.param_names = value[self.k.num_params:self.k.num_params + self.noise_k.num_params] if self.mu is not None: self.mu.param_names = value[self.k.num_params + self.noise_k.num_params:] @property def free_params(self): """Combined free hyperparameters for the kernel, noise kernel and (if present) mean function. """ p = CombinedBounds(self.k.free_params, self.noise_k.free_params) if self.mu is not None: p = CombinedBounds(p, self.mu.free_params) return p @free_params.setter def free_params(self, value): """Set the free parameters. Note that this bypasses enforce_bounds. """ value = scipy.asarray(value, dtype=float) self.K_up_to_date = False self.k.free_params = value[:self.k.num_free_params] self.noise_k.free_params = value[self.k.num_free_params:self.k.num_free_params + self.noise_k.num_free_params] if self.mu is not None: self.mu.free_params = value[self.k.num_free_params + self.noise_k.num_free_params:] @property def free_param_bounds(self): """Combined free hyperparameter bounds for the kernel, noise kernel and (if present) mean function. """ fpb = CombinedBounds(self.k.free_param_bounds, self.noise_k.free_param_bounds) if self.mu is not None: fpb = CombinedBounds(fpb, self.mu.free_param_bounds) return fpb @free_param_bounds.setter def free_param_bounds(self, value): value = scipy.asarray(value, dtype=float) self.k.free_param_bounds = value[:self.k.num_free_params] self.noise_k.free_param_bounds = value[self.k.num_free_params:self.k.num_free_params + self.noise_k.num_free_params] if self.mu is not None: self.mu.free_param_bounds = value[self.k.num_free_params + self.noise_k.num_free_params:] @property def free_param_names(self): """Combined free hyperparameter names for the kernel, noise kernel and (if present) mean function. """ p = CombinedBounds(self.k.free_param_names, self.noise_k.free_param_names) if self.mu is not None: p = CombinedBounds(p, self.mu.free_param_names) return p @free_param_names.setter def free_param_names(self, value): value = scipy.asarray(value, dtype=str) self.K_up_to_date = False self.k.free_param_names = value[:self.k.num_free_params] self.noise_k.free_param_names = value[self.k.num_free_params:self.k.num_free_params + self.noise_k.num_free_params] if self.mu is not None: self.mu.free_param_names = value[self.k.num_free_params + self.noise_k.num_free_params:]
[docs] def add_data(self, X, y, err_y=0, n=0, T=None): """Add data to the training data set of the GaussianProcess instance. Parameters ---------- X : array, (`M`, `D`) `M` input values of dimension `D`. y : array, (`M`,) `M` target values. err_y : array, (`M`,) or scalar float, optional Non-negative values only. Error given as standard deviation) in the `M` target values. If `err_y` is a scalar, the data set is taken to be homoscedastic (constant error). Otherwise, the length of `err_y` must equal the length of `y`. Default value is 0 (noiseless observations). n : array, (`M`, `D`) or scalar float, optional Non-negative integer values only. Degree of derivative for each target. If `n` is a scalar it is taken to be the value for all points in `y`. Otherwise, the length of n must equal the length of `y`. Default value is 0 (observation of target value). If non-integer values are passed, they will be silently rounded. T : array, (`M`, `N`), optional Linear transformation to get from latent variables to data in the argument `y`. When `T` is passed the argument `y` holds the transformed quantities `y=TY(X)` where `y` are the observed values of the transformed quantities, `T` is the transformation matrix and `Y(X)` is the underlying (untransformed) values of the function to be fit that enter into the transformation. When `T` is `M`-by-`N` and `y` has `M` elements, `X` and `n` will both be `N`-by-`D`. Default is None (no transformation). Raises ------ ValueError Bad shapes for any of the inputs, negative values for `err_y` or `n`. """ # Verify y has only one non-trivial dimension: y = scipy.atleast_1d(scipy.asarray(y, dtype=float)) if len(y.shape) != 1: raise ValueError( "Training targets y must have only one dimension with length " "greater than one! Shape of y given is %s" % (y.shape,) ) # Handle scalar error or verify shape of array error matches shape of y: try: iter(err_y) except TypeError: err_y = err_y * scipy.ones_like(y, dtype=float) else: err_y = scipy.asarray(err_y, dtype=float) if err_y.shape != y.shape: raise ValueError( "When using array-like err_y, shape must match shape of y! " "Shape of err_y given is %s, shape of y given is %s." % (err_y.shape, y.shape) ) if (err_y < 0).any(): raise ValueError("All elements of err_y must be non-negative!") # Handle scalar training input or convert array input into 2d. X = scipy.atleast_2d(scipy.asarray(X, dtype=float)) # Correct single-dimension inputs: if self.num_dim == 1 and X.shape[0] == 1: X = X.T if T is None and X.shape != (len(y), self.num_dim): raise ValueError( "Shape of training inputs must be (len(y), k.num_dim)! X given " "has shape %s, shape of y is %s and num_dim=%d." % (X.shape, y.shape, self.num_dim) ) # Handle scalar derivative orders or verify shape of array derivative # orders matches shape of y: try: iter(n) except TypeError: n = n * scipy.ones_like(X, dtype=int) else: n = scipy.atleast_2d(scipy.asarray(n, dtype=int)) # Correct single-dimension inputs: if self.num_dim == 1 and n.shape[1] != 1: n = n.T if n.shape != X.shape: raise ValueError( "When using array-like n, shape must be (len(y), k.num_dim)! " "Shape of n given is %s, shape of y given is %s and num_dim=%d." % (n.shape, y.shape, self.num_dim) ) if (n < 0).any(): raise ValueError("All elements of n must be non-negative integers!") # Handle transform: if T is None and self.T is not None: T = scipy.eye(len(y)) if T is not None: T = scipy.atleast_2d(scipy.asarray(T, dtype=float)) if T.ndim != 2: raise ValueError("T must have exactly 2 dimensions!") if T.shape[0] != len(y): raise ValueError( "T must have as many rows are there are elements in y!" ) if T.shape[1] != X.shape[0]: raise ValueError( "There must be as many columns in T as there are rows in X!" ) if self.T is None and self.X is not None: self.T = scipy.eye(len(self.y)) if self.T is None: self.T = T else: self.T = scipy.linalg.block_diag(self.T, T) if self.X is None: self.X = X else: self.X = scipy.vstack((self.X, X)) self.y = scipy.append(self.y, y) self.err_y = scipy.append(self.err_y, err_y) if self.n is None: self.n = n else: self.n = scipy.vstack((self.n, n)) self.K_up_to_date = False
[docs] def condense_duplicates(self): """Condense duplicate points using a transformation matrix. This is useful if you have multiple non-transformed points at the same location or multiple transformed points that use the same quadrature points. Won't change the GP if all of the rows of [X, n] are unique. Will create a transformation matrix T if necessary. Note that the order of the points in [X, n] will be arbitrary after this operation. If there are any transformed quantities (i.e., `self.T` is not None), it will also remove any quadrature points for which all of the weights are zero (even if all of the rows of [X, n] are unique). """ unique, inv = unique_rows( scipy.hstack((self.X, self.n)), return_inverse=True ) # Only proceed if there is anything to be gained: if len(unique) != len(self.X): if self.T is None: self.T = scipy.eye(len(self.y)) new_T = scipy.zeros((len(self.y), unique.shape[0])) for j in xrange(0, len(inv)): new_T[:, inv[j]] += self.T[:, j] self.T = new_T self.n = unique[:, self.X.shape[1]:] self.X = unique[:, :self.X.shape[1]] # Also remove any points which don't enter into the calculation: if self.T is not None: # Find the columns of T which actually enter in: # Recall that T is (n, n_Q), X is (n_Q, n_dim). good_cols = (self.T != 0.0).any(axis=0) self.T = self.T[:, good_cols] self.X = self.X[good_cols, :] self.n = self.n[good_cols, :]
[docs] def remove_outliers(self, thresh=3, **predict_kwargs): """Remove outliers from the GP with very simplistic outlier detection. Removes points that are more than `thresh` * `err_y` away from the GP mean. Note that this is only very rough in that it ignores the uncertainty in the GP mean at any given point. But you should only be using this as a rough way of removing bad channels, anyways! Returns the values that were removed and a boolean array indicating where the removed points were. Parameters ---------- thresh : float, optional The threshold as a multiplier times `err_y`. Default is 3 (i.e., throw away all 3-sigma points). **predict_kwargs : optional kwargs All additional kwargs are passed to :py:meth:`predict`. You can, for instance, use this to make it use MCMC to evaluate the mean. (If you don't use MCMC, then the current value of the hyperparameters is used.) Returns ------- X_bad : array Input values of the bad points. y_bad : array Bad values. err_y_bad : array Uncertainties on the bad values. n_bad : array Derivative order of the bad values. bad_idxs : array Array of booleans with the original shape of X with True wherever a point was taken to be bad and subsequently removed. T_bad : array Transformation matrix of returned points. Only returned if :py:attr:`T` is not None for the instance. """ mean = self.predict( self.X, n=self.n, noise=False, return_std=False, output_transform=self.T, **predict_kwargs ) deltas = scipy.absolute(mean - self.y) / self.err_y deltas[self.err_y == 0] = 0 bad_idxs = (deltas >= thresh) good_idxs = ~bad_idxs # Pull out the old values so they can be returned: y_bad = self.y[bad_idxs] err_y_bad = self.err_y[bad_idxs] if self.T is not None: T_bad = self.T[bad_idxs, :] non_zero_cols = (T_bad != 0).all(axis=0) T_bad = T_bad[:, non_zero_cols] X_bad = self.X[non_zero_cols, :] n_bad = self.n[non_zero_cols, :] else: X_bad = self.X[bad_idxs, :] n_bad = self.n[bad_idxs, :] # Delete the offending points: if self.T is None: self.X = self.X[good_idxs, :] self.n = self.n[good_idxs, :] else: self.T = self.T[good_idxs, :] non_zero_cols = (self.T != 0).all(axis=0) self.T = self.T[:, non_zero_cols] self.X = self.X[non_zero_cols, :] self.n = self.n[non_zero_cols, :] self.y = self.y[good_idxs] self.err_y = self.err_y[good_idxs] self.K_up_to_date = False if self.T is None: return (X_bad, y_bad, err_y_bad, n_bad, bad_idxs) else: return (X_bad, y_bad, err_y_bad, n_bad, bad_idxs, T_bad)
[docs] def optimize_hyperparameters(self, method='SLSQP', opt_kwargs={}, verbose=False, random_starts=None, num_proc=None, max_tries=1): r"""Optimize the hyperparameters by maximizing the log-posterior. Leaves the :py:class:`GaussianProcess` instance in the optimized state. If :py:func:`scipy.optimize.minimize` is not available (i.e., if your :py:mod:`scipy` version is older than 0.11.0) then :py:func:`fmin_slsqp` is used independent of what you set for the `method` keyword. If :py:attr:`use_hyper_deriv` is True the optimizer will attempt to use the derivatives of the log-posterior with respect to the hyperparameters to speed up the optimization. Note that only the squared exponential covariance kernel supports hyperparameter derivatives at present. Parameters ---------- method : str, optional The method to pass to :py:func:`scipy.optimize.minimize`. Refer to that function's docstring for valid options. Default is 'SLSQP'. See note above about behavior with older versions of :py:mod:`scipy`. opt_kwargs : dict, optional Dictionary of extra keywords to pass to :py:func:`scipy.optimize.minimize`. Refer to that function's docstring for valid options. Default is: {}. verbose : bool, optional Whether or not the output should be verbose. If True, the entire :py:class:`Result` object from :py:func:`scipy.optimize.minimize` is printed. If False, status information is only printed if the `success` flag from :py:func:`minimize` is False. Default is False. random_starts : non-negative int, optional Number of times to randomly perturb the starting guesses (distributed according to the hyperprior) in order to seek the global minimum. If None, then `num_proc` random starts will be performed. Default is None (do number of random starts equal to the number of processors allocated). Note that for `random_starts` != 0, the initial state of the hyperparameters is not actually used. num_proc : non-negative int or None, optional Number of processors to use with random starts. If 0, processing is not done in parallel. If None, all available processors are used. Default is None (use all available processors). max_tries : int, optional Number of times to run through the random start procedure if a solution is not found. Default is to only go through the procedure once. """ if opt_kwargs is None: opt_kwargs = {} else: opt_kwargs = dict(opt_kwargs) if 'method' in opt_kwargs: method = opt_kwargs['method'] if self.verbose: warnings.warn( "Key 'method' is present in opt_kwargs, will override option " "specified with method kwarg.", RuntimeWarning ) else: opt_kwargs['method'] = method if num_proc is None: num_proc = multiprocessing.cpu_count() param_ranges = scipy.asarray(self.free_param_bounds, dtype=float) # Replace unbounded variables with something big: param_ranges[scipy.where(scipy.isnan(param_ranges[:, 0])), 0] = -1e16 param_ranges[scipy.where(scipy.isnan(param_ranges[:, 1])), 1] = 1e16 param_ranges[scipy.where(scipy.isinf(param_ranges[:, 0])), 0] = -1e16 param_ranges[scipy.where(scipy.isinf(param_ranges[:, 1])), 1] = 1e16 if random_starts == 0: num_proc = 0 param_samples = [self.free_params[:]] else: if random_starts is None: random_starts = max(num_proc, 1) # Distribute random guesses according to the hyperprior: param_samples = self.hyperprior.random_draw(size=random_starts).T param_samples = param_samples[:, ~self.fixed_params] if 'bounds' not in opt_kwargs: opt_kwargs['bounds'] = param_ranges if self.use_hyper_deriv: opt_kwargs['jac'] = True trial = 0 res_min = None while trial < max_tries and res_min is None: if trial >= 1: if self.verbose: warnings.warn( "No solutions found on trial %d, retrying random starts." % (trial - 1,), RuntimeWarning ) # Produce a new initial guess: if random_starts != 0: param_samples = self.hyperprior.random_draw(size=random_starts).T param_samples = param_samples[:, ~self.fixed_params] trial += 1 if num_proc > 1: pool = InterruptiblePool(processes=num_proc) map_fun = pool.map else: map_fun = map try: res = map_fun( _OptimizeHyperparametersEval(self, opt_kwargs), param_samples ) finally: if num_proc > 1: pool.close() # Filter out the failed convergences: res = [r for r in res if r is not None] try: res_min = min(res, key=lambda r: r.fun) if scipy.isnan(res_min.fun) or scipy.isinf(res_min.fun): res_min = None except ValueError: res_min = None if res_min is None: raise ValueError( "Optimizer failed to find a valid solution. Try changing the " "parameter bounds, picking a new initial guess or increasing the " "number of random starts." ) self.update_hyperparameters(res_min.x) if verbose: print("Got %d completed starts, optimal result is:" % (len(res),)) print(res_min) print("\nLL\t%.3g" % (-1 * res_min.fun)) for v, l in zip(res_min.x, self.free_param_names): print("%s\t%.3g" % (l.translate(None, '\\'), v)) if not res_min.success: warnings.warn( "Optimizer %s reports failure, selected hyperparameters are " "likely NOT optimal. Status: %d, Message: '%s'. Try adjusting " "bounds, initial guesses or the number of random starts used." % ( method, res_min.status, res_min.message ), RuntimeWarning ) bounds = scipy.asarray(self.free_param_bounds) # Augment the bounds a little bit to catch things that are one step away: if ((res_min.x <= 1.001 * bounds[:, 0]).any() or (res_min.x >= 0.999 * bounds[:, 1]).any()): warnings.warn( "Optimizer appears to have hit/exceeded the bounds. Bounds are:\n" "%s\n, solution is:\n%s. Try adjusting bounds, initial guesses " "or the number of random starts used." % (str(bounds), str(res_min.x),) ) return (res_min, len(res))
[docs] def predict(self, Xstar, n=0, noise=False, return_std=True, return_cov=False, full_output=False, return_samples=False, num_samples=1, samp_kwargs={}, return_mean_func=False, use_MCMC=False, full_MC=False, rejection_func=None, ddof=1, output_transform=None, **kwargs): """Predict the mean and covariance at the inputs `Xstar`. The order of the derivative is given by `n`. The keyword `noise` sets whether or not noise is included in the prediction. Parameters ---------- Xstar : array, (`M`, `D`) `M` test input values of dimension `D`. n : array, (`M`, `D`) or scalar, non-negative int, optional Order of derivative to predict (0 is the base quantity). If `n` is scalar, the value is used for all points in `Xstar`. If non-integer values are passed, they will be silently rounded. Default is 0 (return base quantity). noise : bool, optional Whether or not noise should be included in the covariance. Default is False (no noise in covariance). return_std : bool, optional Set to True to compute and return the standard deviation for the predictions, False to skip this step. Default is True (return tuple of (`mean`, `std`)). return_cov : bool, optional Set to True to compute and return the full covariance matrix for the predictions. This overrides the `return_std` keyword. If you want both the standard deviation and covariance matrix pre-computed, use the `full_output` keyword. full_output : bool, optional Set to True to return the full outputs in a dictionary with keys: ================= =========================================================================== mean mean of GP at requested points std standard deviation of GP at requested points cov covariance matrix for values of GP at requested points samp random samples of GP at requested points (only if `return_samples` is True) mean_func mean function of GP (only if `return_mean_func` is True) cov_func covariance of mean function of GP (zero if not using MCMC) std_func standard deviation of mean function of GP (zero if not using MCMC) mean_without_func mean of GP minus mean function of GP cov_without_func covariance matrix of just the GP portion of the fit std_without_func standard deviation of just the GP portion of the fit ================= =========================================================================== return_samples : bool, optional Set to True to compute and return samples of the GP in addition to computing the mean. Only done if `full_output` is True. Default is False. num_samples : int, optional Number of samples to compute. If using MCMC this is the number of samples per MCMC sample, if using present values of hyperparameters this is the number of samples actually returned. Default is 1. samp_kwargs : dict, optional Additional keywords to pass to :py:meth:`draw_sample` if `return_samples` is True. Default is {}. return_mean_func : bool, optional Set to True to return the evaluation of the mean function in addition to computing the mean of the process itself. Only done if `full_output` is True and `self.mu` is not None. Default is False. use_MCMC : bool, optional Set to True to use :py:meth:`predict_MCMC` to evaluate the prediction marginalized over the hyperparameters. full_MC : bool, optional Set to True to compute the mean and covariance matrix using Monte Carlo sampling of the posterior. The samples will also be returned if full_output is True. The sample mean and covariance will be evaluated after filtering through `rejection_func`, so conditional means and covariances can be computed. Default is False (do not use full sampling). rejection_func : callable, optional Any samples where this function evaluates False will be rejected, where it evaluates True they will be kept. Default is None (no rejection). Only has an effect if `full_MC` is True. ddof : int, optional The degree of freedom correction to use when computing the covariance matrix when `full_MC` is True. Default is 1 (unbiased estimator). output_transform: array, (`L`, `M`), optional Matrix to use to transform the output vector of length `M` to one of length `L`. This can, for instance, be used to compute integrals. **kwargs : optional kwargs All additional kwargs are passed to :py:meth:`predict_MCMC` if `use_MCMC` is True. Returns ------- mean : array, (`M`,) Predicted GP mean. Only returned if `full_output` is False. std : array, (`M`,) Predicted standard deviation, only returned if `return_std` is True, `return_cov` is False and `full_output` is False. cov : array, (`M`, `M`) Predicted covariance matrix, only returned if `return_cov` is True and `full_output` is False. full_output : dict Dictionary with fields for mean, std, cov and possibly random samples and the mean function. Only returned if `full_output` is True. Raises ------ ValueError If `n` is not consistent with the shape of `Xstar` or is not entirely composed of non-negative integers. """ if use_MCMC: res = self.predict_MCMC( Xstar, n=n, noise=noise, return_std=return_std or full_output, return_cov=return_cov or full_output, return_samples=full_output and (return_samples or rejection_func), return_mean_func=full_output and return_mean_func, num_samples=num_samples, samp_kwargs=samp_kwargs, full_MC=full_MC, rejection_func=rejection_func, ddof=ddof, output_transform=output_transform, **kwargs ) if full_output: return res elif return_cov: return (res['mean'], res['cov']) elif return_std: return (res['mean'], res['std']) else: return res['mean'] else: # Process Xstar: Xstar = scipy.atleast_2d(scipy.asarray(Xstar, dtype=float)) # Handle 1d x case where array is passed in: if self.num_dim == 1 and Xstar.shape[0] == 1: Xstar = Xstar.T if Xstar.shape[1] != self.num_dim: raise ValueError( "Second dimension of Xstar must be equal to self.num_dim! " "Shape of Xstar given is %s, num_dim is %d." % (Xstar.shape, self.num_dim) ) # Process T: if output_transform is not None: output_transform = scipy.atleast_2d(scipy.asarray(output_transform, dtype=float)) if output_transform.ndim != 2: raise ValueError( "output_transform must have exactly 2 dimensions! Shape " "of output_transform given is %s." % (output_transform.shape,) ) if output_transform.shape[1] != Xstar.shape[0]: raise ValueError( "output_transform must have the same number of columns " "the number of rows in Xstar! Shape of output_transform " "given is %s, shape of Xstar is %s." % (output_transform.shape, Xstar.shape,) ) # Process n: try: iter(n) except TypeError: n = n * scipy.ones(Xstar.shape, dtype=int) else: n = scipy.atleast_2d(scipy.asarray(n, dtype=int)) if self.num_dim == 1 and n.shape[0] == 1: n = n.T if n.shape != Xstar.shape: raise ValueError( "When using array-like n, shape must match shape of Xstar! " "Shape of n given is %s, shape of Xstar given is %s." % (n.shape, Xstar.shape) ) if (n < 0).any(): raise ValueError("All elements of n must be non-negative integers!") self.compute_K_L_alpha_ll() Kstar = self.compute_Kij(self.X, Xstar, self.n, n) if noise: Kstar = Kstar + self.compute_Kij(self.X, Xstar, self.n, n, noise=True) if self.T is not None: Kstar = self.T.dot(Kstar) mean = Kstar.T.dot(self.alpha) if self.mu is not None: mean_func = scipy.atleast_2d(self.mu(Xstar, n)).T mean += mean_func if output_transform is not None: mean = output_transform.dot(mean) if return_mean_func and self.mu is not None: mean_func = output_transform.dot(mean_func) mean = mean.ravel() if return_mean_func and self.mu is not None: mean_func = mean_func.ravel() if return_std or return_cov or full_output or full_MC: v = scipy.linalg.solve_triangular(self.L, Kstar, lower=True) Kstarstar = self.compute_Kij(Xstar, None, n, None) if noise: Kstarstar = Kstarstar + self.compute_Kij(Xstar, None, n, None, noise=True) covariance = Kstarstar - v.T.dot(v) if output_transform is not None: covariance = output_transform.dot(covariance.dot(output_transform.T)) if return_samples or full_MC: samps = self.draw_sample( Xstar, n=n, num_samp=num_samples, mean=mean, cov=covariance, **samp_kwargs ) if rejection_func: good_samps = [] for samp in samps.T: if rejection_func(samp): good_samps.append(samp) if len(good_samps) == 0: raise ValueError("Did not get any good samples!") samps = scipy.asarray(good_samps, dtype=float).T if full_MC: mean = scipy.mean(samps, axis=1) covariance = scipy.cov(samps, rowvar=1, ddof=ddof) std = scipy.sqrt(scipy.diagonal(covariance)) if full_output: out = { 'mean': mean, 'std': std, 'cov': covariance } if return_samples or full_MC: out['samp'] = samps if return_mean_func and self.mu is not None: out['mean_func'] = mean_func out['cov_func'] = scipy.zeros( (len(mean_func), len(mean_func)), dtype=float ) out['std_func'] = scipy.zeros_like(mean_func) out['mean_without_func'] = mean - mean_func out['cov_without_func'] = covariance out['std_without_func'] = std return out else: if return_cov: return (mean, covariance) elif return_std: return (mean, std) else: return mean else: return mean
[docs] def plot(self, X=None, n=0, ax=None, envelopes=[1, 3], base_alpha=0.375, return_prediction=False, return_std=True, full_output=False, plot_kwargs={}, **kwargs): """Plots the Gaussian process using the current hyperparameters. Only for num_dim <= 2. Parameters ---------- X : array-like (`M`,) or (`M`, `num_dim`), optional The values to evaluate the Gaussian process at. If None, then 100 points between the minimum and maximum of the data's X are used for a univariate Gaussian process and a 50x50 grid is used for a bivariate Gaussian process. Default is None (use 100 points between min and max). n : int or list, optional The order of derivative to compute. For num_dim=1, this must be an int. For num_dim=2, this must be a list of ints of length 2. Default is 0 (don't take derivative). ax : axis instance, optional Axis to plot the result on. If no axis is passed, one is created. If the string 'gca' is passed, the current axis (from plt.gca()) is used. If X_dim = 2, the axis must be 3d. envelopes: list of float, optional +/-n*sigma envelopes to plot. Default is [1, 3]. base_alpha : float, optional Alpha value to use for +/-1*sigma envelope. All other envelopes `env` are drawn with `base_alpha`/`env`. Default is 0.375. return_prediction : bool, optional If True, the predicted values are also returned. Default is False. return_std : bool, optional If True, the standard deviation is computed and returned along with the mean when `return_prediction` is True. Default is True. full_output : bool, optional Set to True to return the full outputs in a dictionary with keys: ==== ========================================================================== mean mean of GP at requested points std standard deviation of GP at requested points cov covariance matrix for values of GP at requested points samp random samples of GP at requested points (only if `return_sample` is True) ==== ========================================================================== plot_kwargs : dict, optional The entries in this dictionary are passed as kwargs to the plotting command used to plot the mean. Use this to, for instance, change the color, line width and line style. **kwargs : extra arguments for predict, optional Extra arguments that are passed to :py:meth:`predict`. Returns ------- ax : axis instance The axis instance used. mean : :py:class:`Array`, (`M`,) Predicted GP mean. Only returned if `return_prediction` is True and `full_output` is False. std : :py:class:`Array`, (`M`,) Predicted standard deviation, only returned if `return_prediction` and `return_std` are True and `full_output` is False. full_output : dict Dictionary with fields for mean, std, cov and possibly random samples. Only returned if `return_prediction` and `full_output` are True. """ if self.num_dim > 2: raise ValueError("Plotting is not supported for num_dim > 2!") if self.num_dim == 1: if X is None: X = scipy.linspace(self.X.min(), self.X.max(), 100) elif self.num_dim == 2: if X is None: x1 = scipy.linspace(self.X[:, 0].min(), self.X[:, 0].max(), 50) x2 = scipy.linspace(self.X[:, 1].min(), self.X[:, 1].max(), 50) X1, X2 = scipy.meshgrid(x1, x2) X1 = X1.flatten() X2 = X2.flatten() X = scipy.hstack((scipy.atleast_2d(X1).T, scipy.atleast_2d(X2).T)) else: X1 = scipy.asarray(X[:, 0]).flatten() X2 = scipy.asarray(X[:, 1]).flatten() if envelopes or (return_prediction and (return_std or full_output)): out = self.predict(X, n=n, full_output=True, **kwargs) mean = out['mean'] std = out['std'] else: mean = self.predict(X, n=n, return_std=False, **kwargs) std = None if self.num_dim == 1: univariate_envelope_plot( X, mean, std, ax=ax, base_alpha=base_alpha, envelopes=envelopes, **plot_kwargs ) elif self.num_dim == 2: if ax is None: f = plt.figure() ax = f.add_subplot(111, projection='3d') elif ax == 'gca': ax = plt.gca() if 'linewidths' not in kwargs: kwargs['linewidths'] = 0 s = ax.plot_trisurf(X1, X2, mean, **plot_kwargs) for i in envelopes: kwargs.pop('alpha', base_alpha) ax.plot_trisurf(X1, X2, mean - std, alpha=base_alpha / i, **kwargs) ax.plot_trisurf(X1, X2, mean + std, alpha=base_alpha / i, **kwargs) if return_prediction: if full_output: return (ax, out) elif return_std: return (ax, out['mean'], out['std']) else: return (ax, out['mean']) else: return ax
[docs] def draw_sample(self, Xstar, n=0, num_samp=1, rand_vars=None, rand_type='standard normal', diag_factor=1e3, method='cholesky', num_eig=None, mean=None, cov=None, modify_sign=None, **kwargs): """Draw a sample evaluated at the given points `Xstar`. Note that this function draws samples from the GP given the current values for the hyperparameters (which may be in a nonsense state if you just created the instance or called a method that performs MCMC sampling). If you want to draw random samples from MCMC output, use the `return_samples` and `full_output` keywords to :py:meth:`predict`. Parameters ---------- Xstar : array, (`M`, `D`) `M` test input values of dimension `D`. n : array, (`M`, `D`) or scalar, non-negative int, optional Derivative order to evaluate at. Default is 0 (evaluate value). noise : bool, optional Whether or not to include the noise components of the kernel in the sample. Default is False (no noise in samples). num_samp : Positive int, optional Number of samples to draw. Default is 1. Cannot be used in conjunction with `rand_vars`: If you pass both `num_samp` and `rand_vars`, `num_samp` will be silently ignored. rand_vars : array, (`M`, `P`), optional Vector of random variables :math:`u` to use in constructing the sample :math:`y_* = f_* + Lu`, where :math:`K=LL^T`. If None, values will be produced using :py:func:`numpy.random.multivariate_normal`. This allows you to use pseudo/quasi random numbers generated by an external routine. Note that, when `method` is 'eig', the eigenvalues are in *ascending* order. Default is None (use :py:func:`multivariate_normal` directly). rand_type : {'standard normal', 'uniform'}, optional Type of distribution the inputs are given with. * 'standard normal': Standard (`mu` = 0, `sigma` = 1) normal distribution (this is the default) * 'uniform': Uniform distribution on [0, 1). In this case the required Gaussian variables are produced with inversion. diag_factor : float, optional Number (times machine epsilon) added to the diagonal of the covariance matrix prior to computing its Cholesky decomposition. This is necessary as sometimes the decomposition will fail because, to machine precision, the matrix appears to not be positive definite. If you are getting errors from :py:func:`scipy.linalg.cholesky`, try increasing this an order of magnitude at a time. This parameter only has an effect when using rand_vars. Default value is 1e3. method : {'cholesky', 'eig'}, optional Method to use for constructing the matrix square root. Default is 'cholesky' (use lower-triangular Cholesky decomposition). * 'cholesky': Perform Cholesky decomposition on the covariance matrix: :math:`K=LL^T`, use :math:`L` as the matrix square root. * 'eig': Perform an eigenvalue decomposition on the covariance matrix: :math:`K=Q \\Lambda Q^{-1}`, use :math:`Q\\Lambda^{1/2}` as the matrix square root. num_eig : int or None, optional Number of eigenvalues to compute. Can range from 1 to `M` (the number of test points). If it is None, then all eigenvalues are computed. Default is None (compute all eigenvalues). This keyword only has an effect if `method` is 'eig'. mean : array, (`M`,), optional If you have pre-computed the mean and covariance matrix, then you can simply pass them in with the `mean` and `cov` keywords to save on having to call :py:meth:`predict`. cov : array, (`M`, `M`), optional If you have pre-computed the mean and covariance matrix, then you can simply pass them in with the `mean` and `cov` keywords to save on having to call :py:meth:`predict`. modify_sign : {None, 'left value', 'right value', 'left slope', 'right slope', 'left concavity', 'right concavity'}, optional If None (the default), the eigenvectors as returned by :py:func:`scipy.linalg.eigh` are used without modification. To modify the sign of the eigenvectors (necessary for some advanced use cases), set this kwarg to one of the following: * 'left value': forces the first value of each eigenvector to be positive. * 'right value': forces the last value of each eigenvector to be positive. * 'left slope': forces the slope to be positive at the start of each eigenvector. * 'right slope': forces the slope to be positive at the end of each eigenvector. * 'left concavity': forces the second derivative to be positive at the start of each eigenvector. * 'right concavity': forces the second derivative to be positive at the end of each eigenvector. **kwargs : optional kwargs All extra keyword arguments are passed to :py:meth:`predict` when evaluating the mean and covariance matrix of the GP. Returns ------- samples : :py:class:`Array` (`M`, `P`) or (`M`, `num_samp`) Samples evaluated at the `M` points. Raises ------ ValueError If rand_type or method is invalid. """ # All of the input processing for Xstar and n will be done in here: if mean is None or cov is None: out = self.predict(Xstar, n=n, full_output=True, **kwargs) mean = out['mean'] cov = out['cov'] if rand_vars is None and method != 'eig': try: return numpy.random.multivariate_normal(mean, cov, num_samp).T except numpy.linalg.LinAlgError as e: if self.verbose: warnings.warn( "Failure when drawing from MVN! Falling back on eig. " "Exception was:\n%s" % (e,), RuntimeWarning ) method = 'eig' if num_eig is None or num_eig > len(mean): num_eig = len(mean) elif num_eig < 1: num_eig = 1 if rand_vars is None: rand_vars = numpy.random.standard_normal((num_eig, num_samp)) valid_types = ('standard normal', 'uniform') if rand_type not in valid_types: raise ValueError( "rand_type %s not recognized! Valid options are: %s." % (rand_type, valid_types,) ) if rand_type == 'uniform': rand_vars = scipy.stats.norm.ppf(rand_vars) if method == 'cholesky': L = scipy.linalg.cholesky( cov + diag_factor * sys.float_info.epsilon * scipy.eye(cov.shape[0]), lower=True, check_finite=False ) elif method == 'eig': # TODO: Add support for specifying cutoff eigenvalue! # Not technically lower triangular, but we'll keep the name L: eig, Q = scipy.linalg.eigh( cov + diag_factor * sys.float_info.epsilon * scipy.eye(cov.shape[0]), eigvals=(len(mean) - 1 - (num_eig - 1), len(mean) - 1) ) if modify_sign is not None: if modify_sign == 'left value': modify_mask = (Q[0, :] < 0.0) elif modify_sign == 'right value': modify_mask = (Q[-1, :] < 0.0) elif modify_sign == 'left slope': modify_mask = ((Q[1, :] - Q[0, :]) < 0.0) elif modify_sign == 'right slope': modify_mask = ((Q[-1, :] - Q[-2, :]) < 0.0) elif modify_sign == 'left concavity': modify_mask = ((Q[2, :] - 2 * Q[1, :] + Q[0, :]) < 0.0) elif modify_sign == 'right concavity': modify_mask = ((Q[-1, :] - 2 * Q[-2, :] + Q[-3, :]) < 0.0) else: raise ValueError( "modify_sign %s not recognized!" % (modify_sign,) ) Q[:, modify_mask] *= -1.0 Lam_1_2 = scipy.diag(scipy.sqrt(eig)) L = Q.dot(Lam_1_2) else: raise ValueError("method %s not recognized!" % (method,)) return scipy.atleast_2d(mean).T + L.dot(rand_vars[:num_eig, :])
[docs] def update_hyperparameters(self, new_params, hyper_deriv_handling='default', exit_on_bounds=True, inf_on_error=True): r"""Update the kernel's hyperparameters to the new parameters. This will call :py:meth:`compute_K_L_alpha_ll` to update the state accordingly. Note that if this method crashes and the `hyper_deriv_handling` keyword was used, it may leave :py:attr:`use_hyper_deriv` in the wrong state. Parameters ---------- new_params : :py:class:`Array` or other Array-like, length dictated by kernel New parameters to use. hyper_deriv_handling : {'default', 'value', 'deriv'}, optional Determines what to compute and return. If 'default' and :py:attr:`use_hyper_deriv` is True then the negative log-posterior and the negative gradient of the log-posterior with respect to the hyperparameters is returned. If 'default' and :py:attr:`use_hyper_deriv` is False or 'value' then only the negative log-posterior is returned. If 'deriv' then only the negative gradient of the log-posterior with respect to the hyperparameters is returned. exit_on_bounds : bool, optional If True, the method will automatically exit if the hyperparameters are impossible given the hyperprior, without trying to update the internal state. This is useful during MCMC sampling and optimization. Default is True (don't perform update for impossible hyperparameters). inf_on_error : bool, optional If True, the method will return `scipy.inf` if the hyperparameters produce a linear algebra error upon trying to update the Gaussian process. Default is True (catch errors and return infinity). Returns ------- -1*ll : float The updated log posterior. -1*ll_deriv : array of float, (`num_params`,) The gradient of the log posterior. Only returned if :py:attr:`use_hyper_deriv` is True or `hyper_deriv_handling` is set to 'deriv'. """ use_hyper_deriv = self.use_hyper_deriv if hyper_deriv_handling == 'value': self.use_hyper_deriv = False elif hyper_deriv_handling == 'deriv': self.use_hyper_deriv = True self.k.set_hyperparams(new_params[:len(self.k.free_params)]) self.noise_k.set_hyperparams( new_params[len(self.k.free_params):len(self.k.free_params) + len(self.noise_k.free_params)] ) if self.mu is not None: self.mu.set_hyperparams( new_params[len(self.k.free_params) + len(self.noise_k.free_params):] ) self.K_up_to_date = False try: if exit_on_bounds: if scipy.isinf(self.hyperprior(self.params)): raise GPImpossibleParamsError("Impossible values for params!") self.compute_K_L_alpha_ll() except Exception as e: if inf_on_error: if not isinstance(e, GPImpossibleParamsError) and self.verbose: warnings.warn( "Unhandled exception when updating GP! Exception was:\n%s\n" "State of params is: %s" % (traceback.format_exc(), str(self.free_params[:])) ) self.use_hyper_deriv = use_hyper_deriv if use_hyper_deriv and hyper_deriv_handling == 'default': return (scipy.inf, scipy.zeros(len(self.free_params))) elif hyper_deriv_handling == 'deriv': return scipy.zeros(len(self.free_params)) else: return scipy.inf else: self.use_hyper_deriv = use_hyper_deriv raise e self.use_hyper_deriv = use_hyper_deriv if use_hyper_deriv and hyper_deriv_handling == 'default': return (-1.0 * self.ll, -1.0 * self.ll_deriv) elif hyper_deriv_handling == 'deriv': return -1.0 * self.ll_deriv else: return -1.0 * self.ll
[docs] def compute_K_L_alpha_ll(self): r"""Compute `K`, `L`, `alpha` and log-likelihood according to the first part of Algorithm 2.1 in R&W. Computes `K` and the noise portion of `K` using :py:meth:`compute_Kij`, computes `L` using :py:func:`scipy.linalg.cholesky`, then computes `alpha` as `L.T\\(L\\y)`. Only does the computation if :py:attr:`K_up_to_date` is False -- otherwise leaves the existing values. """ if not self.K_up_to_date: y = self.y err_y = self.err_y self.K = self.compute_Kij(self.X, None, self.n, None, noise=False) # If the noise kernel is meant to be strictly diagonal, it should # yield a diagonal noise_K: if isinstance(self.noise_k, ZeroKernel): self.noise_K = scipy.zeros((self.X.shape[0], self.X.shape[0])) elif isinstance(self.noise_k, DiagonalNoiseKernel): self.noise_K = self.noise_k.params[0]**2.0 * scipy.eye(self.X.shape[0]) else: self.noise_K = self.compute_Kij(self.X, None, self.n, None, noise=True) K = self.K noise_K = self.noise_K if self.T is not None: KnK = self.T.dot(K + noise_K).dot(self.T.T) else: KnK = K + noise_K K_tot = ( KnK + scipy.diag(err_y**2.0) + self.diag_factor * sys.float_info.epsilon * scipy.eye(len(y)) ) self.L = scipy.linalg.cholesky(K_tot, lower=True) # Need to make the mean-subtracted y that appears in the expression # for alpha: if self.mu is not None: mu_alph = self.mu(self.X, self.n) if self.T is not None: mu_alph = self.T.dot(mu_alph) y_alph = self.y - mu_alph else: y_alph = self.y self.alpha = scipy.linalg.cho_solve((self.L, True), scipy.atleast_2d(y_alph).T) self.ll = ( -0.5 * scipy.atleast_2d(y_alph).dot(self.alpha) - scipy.log(scipy.diag(self.L)).sum() - 0.5 * len(y) * scipy.log(2.0 * scipy.pi) )[0, 0] # Apply hyperpriors: self.ll += self.hyperprior(self.params) if self.use_hyper_deriv: warnings.warn("Use of hyperparameter derivatives is experimental!") # Only compute for the free parameters, since that is what we # want to optimize: self.ll_deriv = scipy.zeros(len(self.free_params)) # Combine the kernel and noise kernel so we only need one loop: if isinstance(self.noise_k, ZeroKernel): knk = self.k elif isinstance(self.noise_k, DiagonalNoiseKernel): knk = self.k # Handle DiagonalNoiseKernel specially: if not self.noise_k.fixed_params[0]: dK_dtheta_i = 2.0 * self.noise_k.params[0] * scipy.eye(len(y)) self.ll_deriv[len(self.k.free_params)] = 0.5 * ( self.alpha.T.dot(dK_dtheta_i.dot(self.alpha)) - scipy.trace(scipy.linalg.cho_solve((self.L, True), dK_dtheta_i)) ) else: knk = self.k + self.noise_k # Get the indices of the free params in knk.params: free_param_idxs = scipy.arange(0, len(knk.params), dtype=int)[~knk.fixed_params] # Handle the kernel and noise kernel: for i, pi in enumerate(free_param_idxs): dK_dtheta_i = self.compute_Kij( self.X, None, self.n, None, k=knk, hyper_deriv=pi ) if self.T is not None: dK_dtheta_i = self.T.dot(dK_dtheta_i).dot(self.T.T) self.ll_deriv[i] = 0.5 * ( self.alpha.T.dot(dK_dtheta_i.dot(self.alpha)) - scipy.trace(scipy.linalg.cho_solve((self.L, True), dK_dtheta_i)) ) # Handle the mean function: if self.mu is not None: # Get the indices of the free params in self.mu.params: free_param_idxs = scipy.arange(0, len(self.mu.params), dtype=int)[~self.mu.fixed_params] for i, pi in enumerate(free_param_idxs): dmu_dtheta_i = scipy.atleast_2d(self.mu(self.X, self.n, hyper_deriv=pi)).T if self.T is not None: dmu_dtheta_i = self.T.dot(dmu_dtheta_i) self.ll_deriv[i + len(knk.free_params)] = dmu_dtheta_i.T.dot(self.alpha) # Handle the hyperprior: # Get the indices of the free params in self.params: free_param_idxs = scipy.arange(0, len(self.params), dtype=int)[~self.fixed_params] for i, pi in enumerate(free_param_idxs): self.ll_deriv[i] += self.hyperprior(self.params, hyper_deriv=pi) self.K_up_to_date = True
@property def num_dim(self): """The number of dimensions of the input data. Returns ------- num_dim: int The number of dimensions of the input data as defined in the kernel. """ return self.k.num_dim
[docs] def compute_Kij(self, Xi, Xj, ni, nj, noise=False, hyper_deriv=None, k=None): r"""Compute covariance matrix between datasets `Xi` and `Xj`. Specify the orders of derivatives at each location with the `ni`, `nj` arrays. The `include_noise` flag is passed to the covariance kernel to indicate whether noise is to be included (i.e., for evaluation of :math:`K+\sigma I` versus :math:`K_*`). If `Xj` is None, the symmetric matrix :math:`K(X, X)` is formed. Note that type and dimension checking is NOT performed, as it is assumed the data are from inside the instance and have hence been sanitized by :py:meth:`add_data`. Parameters ---------- Xi : array, (`M`, `D`) `M` input values of dimension `D`. Xj : array, (`P`, `D`) `P` input values of dimension `D`. ni : array, (`M`, `D`), non-negative integers `M` derivative orders with respect to the `Xi` coordinates. nj : array, (`P`, `D`), non-negative integers `P` derivative orders with respect to the `Xj` coordinates. noise : bool, optional If True, uses the noise kernel, otherwise uses the regular kernel. Default is False (use regular kernel). hyper_deriv : None or non-negative int, optional Index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Default is None (no hyperparameter derivatives). k : :py:class:`~gptools.kernel.core.Kernel` instance, optional The covariance kernel to used. Overrides `noise` if present. Returns ------- Kij : array, (`M`, `P`) Covariance matrix between `Xi` and `Xj`. """ if k is None: if not noise: k = self.k else: k = self.noise_k if Xj is None: symmetric = True Xj = Xi nj = ni else: symmetric = False # TODO: This technically doesn't take advantage of the symmetric case. # Might be worth trying to do that at some point, but this is vastly # superior to the double for loop implementation for which using # symmetry is easy. Xi_tile = scipy.repeat(Xi, Xj.shape[0], axis=0) ni_tile = scipy.repeat(ni, Xj.shape[0], axis=0) Xj_tile = scipy.tile(Xj, (Xi.shape[0], 1)) nj_tile = scipy.tile(nj, (Xi.shape[0], 1)) Kij = k( Xi_tile, Xj_tile, ni_tile, nj_tile, hyper_deriv=hyper_deriv, symmetric=symmetric ) Kij = scipy.reshape(Kij, (Xi.shape[0], -1)) return Kij
[docs] def compute_ll_matrix(self, bounds, num_pts): """Compute the log likelihood over the (free) parameter space. Parameters ---------- bounds : 2-tuple or list of 2-tuples with length equal to the number of free parameters Bounds on the range to use for each of the parameters. If a single 2-tuple is given, it will be used for each of the parameters. num_pts : int or list of ints with length equal to the number of free parameters If a single int is given, it will be used for each of the parameters. Returns ------- ll_vals : :py:class:`Array` The log likelihood for each of the parameter possibilities. param_vals : List of :py:class:`Array` The parameter values used. """ present_free_params = self.free_params[:] bounds = scipy.atleast_2d(scipy.asarray(bounds, dtype=float)) if bounds.shape[1] != 2: raise ValueError("Argument bounds must have shape (n, 2)!") # If bounds is a single tuple, repeat it for each free parameter: if bounds.shape[0] == 1: bounds = scipy.tile(bounds, (len(present_free_params), 1)) # If num_pts is a single value, use it for all of the parameters: try: iter(num_pts) except TypeError: num_pts = num_pts * scipy.ones(bounds.shape[0], dtype=int) else: num_pts = scipy.asarray(num_pts, dtype=int) if len(num_pts) != len(present_free_params): raise ValueError( "Length of num_pts must match the number of free parameters!" ) # Form arrays to evaluate parameters over: param_vals = [] for k in xrange(0, len(present_free_params)): param_vals.append(scipy.linspace(bounds[k, 0], bounds[k, 1], num_pts[k])) ll_vals = self._compute_ll_matrix(0, param_vals, num_pts) # Reset the parameters to what they were before: self.update_hyperparameters(scipy.asarray(present_free_params, dtype=float)) return (ll_vals, param_vals)
def _compute_ll_matrix(self, idx, param_vals, num_pts): """Recursive helper function for compute_ll_matrix. Parameters ---------- idx : int The index of the parameter for this layer of the recursion to work on. `idx` == len(`num_pts`) is the base case that terminates the recursion. param_vals : List of :py:class:`Array` List of arrays of parameter values. Entries in the slots 0:`idx` are set to scalars by the previous levels of recursion. num_pts : :py:class:`Array` The numbers of points for each parameter. Returns ------- vals : :py:class:`Array` The log likelihood for each of the parameter possibilities at lower levels. """ if idx >= len(num_pts): # Base case: All entries in param_vals should be scalars: return -1.0 * self.update_hyperparameters( scipy.asarray(param_vals, dtype=float) ) else: # Recursive case: call _compute_ll_matrix for each entry in param_vals[idx]: vals = scipy.zeros(num_pts[idx:], dtype=float) for k in xrange(0, len(param_vals[idx])): specific_param_vals = list(param_vals) specific_param_vals[idx] = param_vals[idx][k] vals[k] = self._compute_ll_matrix( idx + 1, specific_param_vals, num_pts ) return vals
[docs] def sample_hyperparameter_posterior(self, nwalkers=200, nsamp=500, burn=0, thin=1, num_proc=None, sampler=None, plot_posterior=False, plot_chains=False, sampler_type='ensemble', ntemps=20, sampler_a=2.0, **plot_kwargs): """Produce samples from the posterior for the hyperparameters using MCMC. Returns the sampler created, because storing it stops the GP from being pickleable. To add more samples to a previous sampler, pass the sampler instance in the `sampler` keyword. Parameters ---------- nwalkers : int, optional The number of walkers to use in the sampler. Should be on the order of several hundred. Default is 200. nsamp : int, optional Number of samples (per walker) to take. Default is 500. burn : int, optional This keyword only has an effect on the corner plot produced when `plot_posterior` is True and the flattened chain plot produced when `plot_chains` is True. To perform computations with burn-in, see :py:meth:`compute_from_MCMC`. The number of samples to discard at the beginning of the chain. Default is 0. thin : int, optional This keyword only has an effect on the corner plot produced when `plot_posterior` is True and the flattened chain plot produced when `plot_chains` is True. To perform computations with thinning, see :py:meth:`compute_from_MCMC`. Every `thin`-th sample is kept. Default is 1. num_proc : int or None, optional Number of processors to use. If None, all available processors are used. Default is None (use all available processors). sampler : :py:class:`Sampler` instance The sampler to use. If the sampler already has samples, the most recent sample will be used as the starting point. Otherwise a random sample from the hyperprior will be used. plot_posterior : bool, optional If True, a corner plot of the posterior for the hyperparameters will be generated. Default is False. plot_chains : bool, optional If True, a plot showing the history and autocorrelation of the chains will be produced. sampler_type : str, optional The type of sampler to use. Valid options are "ensemble" (affine- invariant ensemble sampler) and "pt" (parallel-tempered ensemble sampler). ntemps : int, optional Number of temperatures to use with the parallel-tempered ensemble sampler. sampler_a : float, optional Scale of the proposal distribution. plot_kwargs : additional keywords, optional Extra arguments to pass to :py:func:`~gptools.utils.plot_sampler`. """ if num_proc is None: num_proc = multiprocessing.cpu_count() # Needed for emcee to do it right: if num_proc == 0: num_proc = 1 ndim = len(self.free_params) if sampler is None: if sampler_type == 'ensemble': sampler = emcee.EnsembleSampler( nwalkers, ndim, _ComputeLnProbEval(self), threads=num_proc, a=sampler_a ) elif sampler_type == 'pt': # TODO: Finish this! raise NotImplementedError("PTSampler not done yet!") sampler = emcee.PTSampler( ntemps, nwalkers, ndim, logl, logp ) else: raise NotImplementedError( "Sampler type %s not supported!" % (sampler_type,) ) else: sampler.a = sampler_a if sampler.chain.size == 0: theta0 = self.hyperprior.random_draw(size=nwalkers).T theta0 = theta0[:, ~self.fixed_params] else: # Start from the stopping point of the previous chain: theta0 = sampler.chain[:, -1, :] sampler.run_mcmc(theta0, nsamp) if plot_posterior or plot_chains: flat_trace = sampler.chain[:, burn::thin, :] flat_trace = flat_trace.reshape((-1, flat_trace.shape[2])) if plot_posterior and plot_chains: plot_sampler( sampler, labels=['$%s$' % (l,) for l in self.free_param_names], burn=burn, **plot_kwargs ) else: if plot_posterior: triangle.corner( flat_trace, plot_datapoints=False, labels=['$%s$' % (l,) for l in self.free_param_names] ) if plot_chains: f = plt.figure() for k in xrange(0, ndim): # a = f.add_subplot(3, ndim, k + 1) # a.acorr( # sampler.flatchain[:, k], # maxlags=100, # detrend=plt.mlab.detrend_mean # ) # a.set_xlabel('lag') # a.set_title('$%s$ autocorrelation' % (self.free_param_names[k],)) a = f.add_subplot(ndim, 1, 0 * ndim + k + 1) for chain in sampler.chain[:, :, k]: a.plot(chain) a.set_xlabel('sample') a.set_ylabel('$%s$' % (self.free_param_names[k],)) a.set_title('$%s$ all chains' % (self.free_param_names[k],)) a.axvline(burn, color='r', linewidth=3, ls='--') # a = f.add_subplot(2, ndim, 1 * ndim + k + 1) # a.plot(flat_trace[:, k]) # a.set_xlabel('sample') # a.set_ylabel('$%s$' % (self.free_param_names[k],)) # a.set_title('$%s$ flattened, burned and thinned chain' % (self.free_param_names[k],)) # Print a summary of the sampler: print("MCMC parameter summary:") print("param\tmean\t95% posterior interval") mean, ci_l, ci_u = summarize_sampler(sampler, burn=burn) names = self.free_param_names[:] for n, m, l, u in zip(names, mean, ci_l, ci_u): print("%s\t%4.4g\t[%4.4g, %4.4g]" % (n, m, l, u)) return sampler
[docs] def compute_from_MCMC(self, X, n=0, return_mean=True, return_std=True, return_cov=False, return_samples=False, return_mean_func=False, num_samples=1, noise=False, samp_kwargs={}, sampler=None, flat_trace=None, burn=0, thin=1, **kwargs): """Compute desired quantities from MCMC samples of the hyperparameter posterior. The return will be a list with a number of rows equal to the number of hyperparameter samples. The columns depend on the state of the boolean flags, but will be some subset of (mean, stddev, cov, samples), in that order. Samples will be the raw output of :py:meth:`draw_sample`, so you will need to remember to convert to an array and flatten if you want to work with a single sample. Parameters ---------- X : array-like (`M`,) or (`M`, `num_dim`) The values to evaluate the Gaussian process at. n : non-negative int or list, optional The order of derivative to compute. For num_dim=1, this must be an int. For num_dim=2, this must be a list of ints of length 2. Default is 0 (don't take derivative). return_mean : bool, optional If True, the mean will be computed at each hyperparameter sample. Default is True (compute mean). return_std : bool, optional If True, the standard deviation will be computed at each hyperparameter sample. Default is True (compute stddev). return_cov : bool, optional If True, the covariance matrix will be computed at each hyperparameter sample. Default is True (compute stddev). return_samples : bool, optional If True, random sample(s) will be computed at each hyperparameter sample. Default is False (do not compute samples). num_samples : int, optional Compute this many samples if `return_sample` is True. Default is 1. noise : bool, optional If True, noise is included in the predictions and samples. Default is False (do not include noise). samp_kwargs : dict, optional If `return_sample` is True, the contents of this dictionary will be passed as kwargs to :py:meth:`draw_sample`. sampler : :py:class:`Sampler` instance or None, optional :py:class:`Sampler` instance that has already been run to the extent desired on the hyperparameter posterior. If None, a new sampler will be created with :py:meth:`sample_hyperparameter_posterior`. In this case, all extra kwargs will be passed on, allowing you to set the number of samples, etc. Default is None (create sampler). flat_trace : array-like (`nsamp`, `ndim`) or None, optional Flattened trace with samples of the free hyperparameters. If present, overrides `sampler`. This allows you to use a sampler other than the ones from :py:mod:`emcee`, or to specify arbitrary values you wish to evaluate the curve at. Note that this WILL be thinned and burned according to the following two kwargs. "Flat" refers to the fact that you must have combined all chains into a single one. Default is None (use `sampler`). burn : int, optional The number of samples to discard at the beginning of the chain. Default is 0. thin : int, optional Every `thin`-th sample is kept. Default is 1. num_proc : int, optional The number of processors to use for evaluation. This is used both when calling the sampler and when evaluating the Gaussian process. If None, the number of available processors will be used. If zero, evaluation will proceed in parallel. Default is to use all available processors. **kwargs : extra optional kwargs All additional kwargs are passed to :py:meth:`sample_hyperparameter_posterior`. Returns ------- out : dict A dictionary having some or all of the fields 'mean', 'std', 'cov' and 'samp'. Each entry is a list of array-like. The length of this list is equal to the number of hyperparameter samples used, and the entries have the following shapes: ==== ==================== mean (`M`,) std (`M`,) cov (`M`, `M`) samp (`M`, `num_samples`) ==== ==================== """ output_transform = kwargs.pop('output_transform', None) if flat_trace is None: if sampler is None: sampler = self.sample_hyperparameter_posterior(burn=burn, **kwargs) # If we create the sampler, we need to make sure we clean up its pool: try: sampler.pool.close() except AttributeError: # This will occur if only one thread is used. pass flat_trace = sampler.chain[:, burn::thin, :] flat_trace = flat_trace.reshape((-1, flat_trace.shape[2])) else: flat_trace = flat_trace[burn::thin, :] num_proc = kwargs.get('num_proc', multiprocessing.cpu_count()) if num_proc > 1: pool = InterruptiblePool(processes=num_proc) map_fun = pool.map else: map_fun = map try: res = map_fun( _ComputeGPWrapper( self, X, n, return_mean, return_std, return_cov, return_samples, return_mean_func, num_samples, noise, samp_kwargs, output_transform ), flat_trace ) finally: if num_proc > 1: pool.close() out = dict() if return_mean: out['mean'] = [r['mean'] for r in res if r is not None] if return_std: out['std'] = [r['std'] for r in res if r is not None] if return_cov: out['cov'] = [r['cov'] for r in res if r is not None] if return_samples: out['samp'] = [r['samp'] for r in res if r is not None] if return_mean_func and self.mu is not None: out['mean_func'] = [r['mean_func'] for r in res if r is not None] out['cov_func'] = [r['cov_func'] for r in res if r is not None] out['std_func'] = [r['std_func'] for r in res if r is not None] out['mean_without_func'] = [r['mean_without_func'] for r in res if r is not None] out['cov_without_func'] = [r['cov_without_func'] for r in res if r is not None] out['std_without_func'] = [r['std_without_func'] for r in res if r is not None] return out
[docs] def compute_l_from_MCMC(self, X, n=0, sampler=None, flat_trace=None, burn=0, thin=1, **kwargs): """Compute desired quantities from MCMC samples of the hyperparameter posterior. The return will be a list with a number of rows equal to the number of hyperparameter samples. The columns will contain the covariance length scale function. Parameters ---------- X : array-like (`M`,) or (`M`, `num_dim`) The values to evaluate the Gaussian process at. n : non-negative int or list, optional The order of derivative to compute. For num_dim=1, this must be an int. For num_dim=2, this must be a list of ints of length 2. Default is 0 (don't take derivative). sampler : :py:class:`Sampler` instance or None, optional :py:class:`Sampler` instance that has already been run to the extent desired on the hyperparameter posterior. If None, a new sampler will be created with :py:meth:`sample_hyperparameter_posterior`. In this case, all extra kwargs will be passed on, allowing you to set the number of samples, etc. Default is None (create sampler). flat_trace : array-like (`nsamp`, `ndim`) or None, optional Flattened trace with samples of the free hyperparameters. If present, overrides `sampler`. This allows you to use a sampler other than the ones from :py:mod:`emcee`, or to specify arbitrary values you wish to evaluate the curve at. Note that this WILL be thinned and burned according to the following two kwargs. "Flat" refers to the fact that you must have combined all chains into a single one. Default is None (use `sampler`). burn : int, optional The number of samples to discard at the beginning of the chain. Default is 0. thin : int, optional Every `thin`-th sample is kept. Default is 1. num_proc : int, optional The number of processors to use for evaluation. This is used both when calling the sampler and when evaluating the Gaussian process. If None, the number of available processors will be used. If zero, evaluation will proceed in parallel. Default is to use all available processors. **kwargs : extra optional kwargs All additional kwargs are passed to :py:meth:`sample_hyperparameter_posterior`. Returns ------- out : array of float Length scale function at the indicated points. """ if flat_trace is None: if sampler is None: sampler = self.sample_hyperparameter_posterior(burn=burn, **kwargs) # If we create the sampler, we need to make sure we clean up # its pool: try: sampler.pool.close() except AttributeError: # This will occur if only one thread is used. pass flat_trace = sampler.chain[:, burn::thin, :] flat_trace = flat_trace.reshape((-1, flat_trace.shape[2])) else: flat_trace = flat_trace[burn::thin, :] num_proc = kwargs.get('num_proc', multiprocessing.cpu_count()) if num_proc > 1: pool = InterruptiblePool(processes=num_proc) try: res = pool.map(_ComputeLWrapper(self, X, n), flat_trace) finally: pool.close() else: res = map(_ComputeLWrapper(self, X, n), flat_trace) return res
[docs] def compute_w_from_MCMC(self, X, n=0, sampler=None, flat_trace=None, burn=0, thin=1, **kwargs): """Compute desired quantities from MCMC samples of the hyperparameter posterior. The return will be a list with a number of rows equal to the number of hyperparameter samples. The columns will contain the warping function. Parameters ---------- X : array-like (`M`,) or (`M`, `num_dim`) The values to evaluate the Gaussian process at. n : non-negative int or list, optional The order of derivative to compute. For num_dim=1, this must be an int. For num_dim=2, this must be a list of ints of length 2. Default is 0 (don't take derivative). sampler : :py:class:`Sampler` instance or None, optional :py:class:`Sampler` instance that has already been run to the extent desired on the hyperparameter posterior. If None, a new sampler will be created with :py:meth:`sample_hyperparameter_posterior`. In this case, all extra kwargs will be passed on, allowing you to set the number of samples, etc. Default is None (create sampler). flat_trace : array-like (`nsamp`, `ndim`) or None, optional Flattened trace with samples of the free hyperparameters. If present, overrides `sampler`. This allows you to use a sampler other than the ones from :py:mod:`emcee`, or to specify arbitrary values you wish to evaluate the curve at. Note that this WILL be thinned and burned according to the following two kwargs. "Flat" refers to the fact that you must have combined all chains into a single one. Default is None (use `sampler`). burn : int, optional The number of samples to discard at the beginning of the chain. Default is 0. thin : int, optional Every `thin`-th sample is kept. Default is 1. num_proc : int, optional The number of processors to use for evaluation. This is used both when calling the sampler and when evaluating the Gaussian process. If None, the number of available processors will be used. If zero, evaluation will proceed in parallel. Default is to use all available processors. **kwargs : extra optional kwargs All additional kwargs are passed to :py:meth:`sample_hyperparameter_posterior`. Returns ------- out : array of float Length scale function at the indicated points. """ if flat_trace is None: if sampler is None: sampler = self.sample_hyperparameter_posterior(burn=burn, **kwargs) # If we create the sampler, we need to make sure we clean up # its pool: try: sampler.pool.close() except AttributeError: # This will occur if only one thread is used. pass flat_trace = sampler.chain[:, burn::thin, :] flat_trace = flat_trace.reshape((-1, flat_trace.shape[2])) else: flat_trace = flat_trace[burn::thin, :] num_proc = kwargs.get('num_proc', multiprocessing.cpu_count()) if num_proc > 1: pool = InterruptiblePool(processes=num_proc) try: res = pool.map(_ComputeWWrapper(self, X, n), flat_trace) finally: pool.close() else: res = map(_ComputeWWrapper(self, X, n), flat_trace) return res
[docs] def predict_MCMC(self, X, ddof=1, full_MC=False, rejection_func=None, **kwargs): """Make a prediction using MCMC samples. This is essentially a convenient wrapper of :py:meth:`compute_from_MCMC`, designed to act more or less interchangeably with :py:meth:`predict`. Computes the mean of the GP posterior marginalized over the hyperparameters using iterated expectations. If `return_std` is True, uses the law of total variance to compute the variance of the GP posterior marginalized over the hyperparameters. If `return_cov` is True, uses the law of total covariance to compute the entire covariance of the GP posterior marginalized over the hyperparameters. If both `return_cov` and `return_std` are True, then both the covariance matrix and standard deviation array will be returned. Parameters ---------- X : array-like (`M`,) or (`M`, `num_dim`) The values to evaluate the Gaussian process at. ddof : int, optional The degree of freedom correction to use when computing the variance. Default is 1 (standard Bessel correction for unbiased estimate). return_std : bool, optional If True, the standard deviation is also computed. Default is True. full_MC : bool, optional Set to True to compute the mean and covariance matrix using Monte Carlo sampling of the posterior. The samples will also be returned if full_output is True. Default is False (don't use full sampling). rejection_func : callable, optional Any samples where this function evaluates False will be rejected, where it evaluates True they will be kept. Default is None (no rejection). Only has an effect if `full_MC` is True. ddof : int, optional **kwargs : optional kwargs All additional kwargs are passed directly to :py:meth:`compute_from_MCMC`. """ return_std = kwargs.get('return_std', True) return_cov = kwargs.get('return_cov', False) if full_MC: kwargs['return_mean'] = False kwargs['return_std'] = False kwargs['return_cov'] = False kwargs['return_samples'] = True else: kwargs['return_mean'] = True return_samples = kwargs.get('return_samples', True) res = self.compute_from_MCMC(X, **kwargs) out = {} if return_samples: samps = scipy.asarray(scipy.hstack(res['samp'])) if full_MC: if rejection_func: good_samps = [] for samp in samps.T: if rejection_func(samp): good_samps.append(samp) if len(good_samps) == 0: raise ValueError("Did not get any good samples!") samps = scipy.asarray(good_samps, dtype=float).T mean = scipy.mean(samps, axis=1) cov = scipy.cov(samps, rowvar=1, ddof=ddof) std = scipy.sqrt(scipy.diagonal(cov)) else: means = scipy.asarray(res['mean']) mean = scipy.mean(means, axis=0) # TODO: Allow use of robust estimators! if 'cov' in res: covs = scipy.asarray(res['cov']) cov = scipy.mean(covs, axis=0) + scipy.cov(means, rowvar=0, ddof=ddof) std = scipy.sqrt(scipy.diagonal(cov)) elif 'std' in res: vars_ = scipy.asarray(scipy.asarray(res['std']))**2 std = scipy.sqrt(scipy.mean(vars_, axis=0) + scipy.var(means, axis=0, ddof=ddof)) if 'mean_func' in res: mean_funcs = scipy.asarray(res['mean_func']) cov_funcs = scipy.asarray(res['cov_func']) mean_func = scipy.mean(mean_funcs, axis=0) cov_func = scipy.mean(cov_funcs, axis=0) + scipy.cov(mean_funcs, rowvar=0, ddof=ddof) std_func = scipy.sqrt(scipy.diagonal(cov_func)) mean_without_funcs = scipy.asarray(res['mean_without_func']) cov_without_funcs = scipy.asarray(res['cov_without_func']) mean_without_func = scipy.mean(mean_without_funcs, axis=0) cov_without_func = ( scipy.mean(cov_without_funcs, axis=0) + scipy.cov(mean_without_funcs, rowvar=0, ddof=ddof) ) std_without_func = scipy.sqrt(scipy.diagonal(cov_without_func)) out['mean_func'] = mean_func out['cov_func'] = cov_func out['std_func'] = std_func out['mean_without_func'] = mean_without_func out['cov_without_func'] = cov_without_func out['std_without_func'] = std_without_func out['mean'] = mean if return_samples: out['samp'] = samps if return_std or return_cov: out['std'] = std if return_cov: out['cov'] = cov return out
class _ComputeGPWrapper(object): """Wrapper to allow parallel evaluation of means, covariances and random draws. Parameters ---------- gp : :py:class:`GaussianProcess` instance The :py:class:`GaussianProcess` to wrap. X : array-like The evaluation locations to use. No pre-processing is performed: `X` will be passed directly to :py:meth:`predict` and/or :py:meth:`draw_sample`. n : int or array-like The derivative orders to use. No pre-processing is performed: `n` will be passed directly to :py:meth:`predict` and/or :py:meth:`draw_sample`. return_mean : bool If True, the mean will be computed. return_std : bool If True, the standard deviation will be computed. return_cov : bool If True, the covariance matrix will be computed. return_sample : bool If True, random sample(s) will be computed. num_samples : int If `return_sample` is True, this many random samples will be computed. noise : bool If True, noise will be included in the prediction and samples. samp_kwargs : dict The contents of this dictionary will be passed to :py:meth:`draw_sample`. """ def __init__(self, gp, X, n, return_mean, return_std, return_cov, return_sample, return_mean_func, num_samples, noise, samp_kwargs, output_transform): self.gp = gp self.X = X self.n = n self.return_mean = return_mean self.return_std = return_std self.return_cov = return_cov self.return_sample = return_sample self.return_mean_func = return_mean_func self.num_samples = num_samples self.noise = noise self.samp_kwargs = samp_kwargs self.full_output = return_cov or return_std or return_sample or return_mean_func self.output_transform = output_transform def __call__(self, p_case): """Evaluate the desired quantities with free hyperparameters `p_case`. Returns a dict with some or all of the fields 'mean', 'cov', 'std', 'samp' """ try: self.gp.update_hyperparameters(list(p_case)) out = self.gp.predict( self.X, n=self.n, noise=self.noise, full_output=self.full_output, return_samples=self.return_sample, return_mean_func=self.return_mean_func, num_samples=self.num_samples, output_transform=self.output_transform ) if not self.full_output: # If full output is True, return_mean must be the only True thing, # since otherwise this isn't computing anything! return {'mean': out} else: if not self.return_mean: out.pop('mean') if not self.return_std: out.pop('std') if not self.return_cov: out.pop('cov') except Exception as e: out = None if self.gp.verbose: warnings.warn( "Encountered exception during evaluation of MCMC samples. " "Exception is:\n%s\nParams are:\n%s" % ( traceback.format_exc(), str(list(p_case)) ) ) return out class _ComputeLWrapper(object): """Wrapper to allow parallel evaluation of the covariance length scale function. Parameters ---------- gp : :py:class:`GaussianProcess` instance The :py:class:`GaussianProcess` to wrap. X : array-like The evaluation locations to use. No pre-processing is performed: `X` will be passed directly to :py:attr:`l_func`. n : int or array-like The derivative orders to use. No pre-processing is performed: `n` will be passed directly to :py:attr:`l_func`. """ def __init__(self, gp, X, n): self.gp = gp self.X = X self.n = n def __call__(self, p_case): """Evaluate the covariance length scale function with free hyperparameters `p_case`. """ try: p_case = scipy.asarray(p_case) p = scipy.copy(scipy.asarray(self.gp.k.params, dtype=float)) p[~self.gp.k.fixed_params] = p_case[:len(self.gp.k.free_params)] out = self.gp.k.l_func(self.X, self.n, *p[1:]) except Exception as e: out = None if self.gp.verbose: warnings.warn( "Encountered exception during evaluation of MCMC samples. " "Exception is:\n%s\nParams are:\n%s" % (traceback.format_exc(), str(list(p_case))) ) return out class _ComputeWWrapper(object): """Wrapper to allow parallel evaluation of the warping function. Parameters ---------- gp : :py:class:`GaussianProcess` instance The :py:class:`GaussianProcess` to wrap. X : array-like The evaluation locations to use. No pre-processing is performed: `X` will be passed directly to :py:attr:`w`. n : int or array-like The derivative orders to use. No pre-processing is performed: `n` will be passed directly to :py:attr:`w`. """ def __init__(self, gp, X, n): self.gp = gp self.X = X self.n = n def __call__(self, p_case): """Evaluate the covariance length scale function with free hyperparameters `p_case`. Note that this only handles warpings nested up to two deep (i.e., if a beta-CDF warp is wrapped in a linear warp). """ try: p_case = scipy.asarray(p_case) p = scipy.copy(scipy.asarray(self.gp.k.params, dtype=float)) p[~self.gp.k.fixed_params] = p_case[:len(self.gp.k.free_params)] self.gp.k.params = p is_nested = hasattr(self.gp.k.k, 'w') out = self.gp.k.w_func(self.X, 0, self.n) except Exception as e: out = None if self.gp.verbose: warnings.warn( "Encountered exception during evaluation of MCMC samples. " "Exception is:\n%s\nParams are:\n%s" % (traceback.format_exc(), str(list(p_case))) ) return out class _ComputeLnProbEval(object): """Helper class to allow emcee to sample in parallel. Parameters ---------- gp : :py:class:`GaussianProcess` instance The :py:class:`GaussianProcess` instance to wrap. """ def __init__(self, gp): self.gp = gp def __call__(self, x): """Return the log-probability of the given hyperparameters. Parameters ---------- x : array-like The new hyperparameters. """ return -1 * self.gp.update_hyperparameters(x.flatten()) class _OptimizeHyperparametersEval(object): """Helper class to support parallel random starts of MAP estimation of hyperparameters. Parameters ---------- gp : :py:class:`GaussianProcess` instance Instance to wrap to allow parallel optimization of. opt_kwargs : dict Dictionary of keyword arguments to be passed to :py:func:`scipy.optimize.minimize`. """ def __init__(self, gp, opt_kwargs): self.gp = gp self.opt_kwargs = opt_kwargs def __call__(self, samp): try: return scipy.optimize.minimize( self.gp.update_hyperparameters, samp, **self.opt_kwargs ) except AttributeError: if self.gp.verbose: warnings.warn( "scipy.optimize.minimize not available, defaulting to fmin_slsqp.", RuntimeWarning ) return wrap_fmin_slsqp( self.gp.update_hyperparameters, samp, opt_kwargs=self.opt_kwargs ) except: if self.gp.verbose: warnings.warn( "Minimizer failed, skipping sample. Error is: %s. " "State of params is: %s" % ( traceback.format_exc(), str(self.gp.free_params[:]), ), RuntimeWarning ) return None
[docs]class Constraint(object): """Implements an inequality constraint on the value of the mean or its derivatives. Provides a callable such as can be passed to SLSQP or COBYLA to implement the constraint when using :py:func:`scipy.optimize.minimize`. The function defaults implement a constraint that forces the mean value to be positive everywhere. Parameters ---------- gp : :py:class:`GaussianProcess` The :py:class:`GaussianProcess` instance to create the constraint on. boundary_val : float, optional Boundary value for the constraint. For `type_` = 'gt', this is the lower bound, for `type_` = 'lt', this is the upper bound. Default is 0.0. n : non-negative int, optional Derivative order to evaluate. Default is 0 (value of the mean). Note that non-int values are silently cast to int. loc : {'min', 'max'}, float or Array-like of float (`num_dim`,), optional Which extreme of the mean to use, or location to evaluate at. * If 'min', the minimum of the mean (optionally over `bounds`) is used. * If 'max', the maximum of the mean (optionally over `bounds`) is used. * If a float (valid for `num_dim` = 1 only) or Array of float, the mean is evaluated at the given X value. Default is 'min' (use function minimum). type_ : {'gt', 'lt'}, optional What type of inequality constraint to implement. * If 'gt', a greater-than-or-equals constraint is used. * If 'lt', a less-than-or-equals constraint is used. Default is 'gt' (greater-than-or-equals). bounds : 2-tuple of float or 2-tuple Array-like of float (`num_dim`,) or None, optional Bounds to use when `loc` is 'min' or 'max'. * If None, the bounds are taken to be the extremes of the training data. For multivariate data, "extremes" essentially means the smallest hypercube oriented parallel to the axes that encapsulates all of the training inputs. (I.e., ``(gp.X.min(axis=0), gp.X.max(axis=0))``) * If `bounds` is a 2-tuple, then this is used as (`lower`, `upper`) where lower` and `upper` are Array-like with dimensions (`num_dim`,). * If `num_dim` is 1 then `lower` and `upper` can be scalar floats. Default is None (use extreme values of training data). Raises ------ TypeError If `gp` is not an instance of :py:class:`GaussianProcess`. ValueError If `n` is negative. ValueError If `loc` is not 'min', 'max' or an Array-like of the correct dimensions. ValueError If `type_` is not 'gt' or 'lt'. ValueError If `bounds` is not None or length 2 or if the elements of bounds don't have the right dimensions. """ def __init__(self, gp, boundary_val=0.0, n=0, loc='min', type_='gt', bounds=None): if not isinstance(gp, GaussianProcess): raise TypeError("Argument gp must be an instance of GaussianProcess.") self.gp = gp self.boundary_val = boundary_val self.n = int(n) if self.n < 0: raise ValueError("n must be a non-negative int!") if loc in ('min', 'max'): self.loc = loc else: try: iter(loc) except TypeError: if self.gp.num_dim == 1: self.loc = scipy.asarray([loc], dtype=float) else: raise ValueError("Argument loc must be 'min', 'max' or an " "array of length %d" % self.gp.num_dim) else: loc = scipy.asarray(loc, dtype=float) if loc.shape == (self.gp.num_dim,): self.loc = loc else: raise ValueError("Argument loc must be 'min', 'max' or have " "length %d" % self.gp.num_dim) if type_ in ('gt', 'lt'): self.type_ = type_ else: raise ValueError("Argument type_ must be 'gt' or 'lt'.") if bounds is None: bounds = (scipy.asarray(self.gp.X.min(axis=0), dtype=float).flatten(), scipy.asarray(self.gp.X.max(axis=0), dtype=float).flatten()) else: bounds = list(bounds) if len(bounds) != 2: raise ValueError("Argument bounds must have length 2!") for k in xrange(0, len(bounds)): try: iter(bounds[k]) except TypeError: if self.gp.num_dim == 1: bounds[k] = scipy.asarray([bounds[k]], dtype=float) else: raise ValueError("Each element in argument bounds must " "have length %d" % self.gp.num_dim) else: bounds[k] = scipy.asarray(bounds[k], dtype=float) if bounds[k].shape != (self.gp.num_dim,): raise ValueError("Each element in argument bounds must " "have length %d" % self.gp.num_dim) # Unfold bounds into the shape needed by minimize: self.bounds = zip(bounds[0], bounds[1])
[docs] def __call__(self, params): """Returns a non-negative number if the constraint is satisfied. Parameters ---------- params : Array-like, length dictated by kernel New parameters to use. Returns ------- val : float Value of the constraint. :py:class:`minimize` will attempt to keep this non-negative. """ self.gp.update_hyperparameters(params) if self.loc not in ('min', 'max'): val = self.gp.predict(self.loc, n=self.n, return_cov=False) else: if self.loc == 'max': factor = -1.0 else: factor = 1.0 try: res = scipy.optimize.minimize( lambda X: factor * self.gp.predict(X, n=self.n, return_cov=False)[0, 0], scipy.mean(self.bounds, axis=1), method='SLSQP', bounds=self.bounds ) except AttributeError: res = wrap_fmin_slsqp( lambda X: factor * self.gp.predict(X, n=self.n, return_cov=False)[0, 0], scipy.mean(self.bounds, axis=1), opt_kwargs={'bounds': self.bounds, 'iprint': 0} ) if not res.success: warnings.warn("Solver reports failure, extremum was likely NOT " "found. Status: %d, Message: '%s'" % (res.status, res.message), RuntimeWarning) val = factor * res.fun if self.type_ == 'gt': return val - self.boundary_val else: return self.boundary_val - val