Source code for kernelmethods.sampling

from functools import partial
from warnings import warn

import numpy as np
from kernelmethods import config as cfg
from kernelmethods.base import BaseKernelFunction, KernelMatrix, KernelSet
from kernelmethods.config import KernelMethodsException, KernelMethodsWarning
from kernelmethods.numeric_kernels import (GaussianKernel, LaplacianKernel,
                                           LinearKernel, PolyKernel)
from kernelmethods.operations import alignment_centered
from kernelmethods.utils import is_iterable_but_not_str
from scipy.stats.stats import pearsonr


[docs]class KernelBucket(KernelSet):
    """
    Class to generate and/or maintain a "bucket" of candidate kernels.

    Applications:

        1. to rank/filter/select kernels based on a given sample via many metrics
        2. to be defined.

    **Note**:
    1. Linear kernel is always added during init without your choosing.
    2. This is in contrast to Chi^2 kernel, which is not added to the bucket by
    default, as it requires positive feature values and may break default use for
    common applications. You can easily add Chi^2 or any other kernels via the
    ``add_parametrized_kernels`` method.


    Parameters
    ----------
    poly_degree_values : Iterable
        List of values for the degree parameter of the PolyKernel. One
        KernelMatrix will be added to the bucket for each value.

    rbf_sigma_values : Iterable
        List of values for the sigma parameter of the GaussianKernel. One
        KernelMatrix will be added to the bucket for each value.

    laplace_gamma_values : Iterable
        List of values for the gamma parameter of the LaplacianKernel. One
        KernelMatrix will be added to the bucket for each value.

    name : str
        String to identify the purpose or type of the bucket of kernels.
        Also helps easily distinguishing it from other buckets.

    normalize_kernels : bool
        Flag to indicate whether the kernel matrices need to be normalized

    skip_input_checks : bool
        Flag to indicate whether checks on input data (type, format etc) can
        be skipped. This helps save a tiny bit of runtime for expert uses when
        data types and formats are managed thoroughly in numpy. Default:
        False. Disable this only when you know exactly what you're doing!

    """


    def __init__(self,
                 poly_degree_values=cfg.default_degree_values_poly_kernel,
                 rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel,
                 laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel,
                 name='KernelBucket',
                 normalize_kernels=True,
                 skip_input_checks=False,
                 ):
        """
        Constructor.

        Parameters
        ----------
        poly_degree_values : Iterable
            List of values for the degree parameter of the PolyKernel. One
            KernelMatrix will be added to the bucket for each value.

        rbf_sigma_values : Iterable
            List of values for the sigma parameter of the GaussianKernel. One
            KernelMatrix will be added to the bucket for each value.

        laplace_gamma_values : Iterable
            List of values for the gamma parameter of the LaplacianKernel. One
            KernelMatrix will be added to the bucket for each value.

        name : str
            String to identify the purpose or type of the bucket of kernels.
            Also helps easily distinguishing it from other buckets.

        normalize_kernels : bool
            Flag to indicate whether the kernel matrices need to be normalized

        skip_input_checks : bool
            Flag to indicate whether checks on input data (type, format etc) can
            be skipped. This helps save a tiny bit of runtime for expert uses when
            data types and formats are managed thoroughly in numpy. Default:
            False. Disable this only when you know exactly what you're doing!

        """

        if isinstance(normalize_kernels, bool):
            self._norm_kernels = normalize_kernels
        else:
            raise TypeError('normalize_kernels must be bool')

        if isinstance(skip_input_checks, bool):
            self._skip_input_checks = skip_input_checks
        else:
            raise TypeError('skip_input_checks must be bool')

        # start with the addition of kernel matrix for linear kernel
        init_kset = [KernelMatrix(LinearKernel(), normalized=self._norm_kernels), ]
        super().__init__(km_list=init_kset, name=name)
        # not attached to a sample yet
        self._num_samples = None

        self.add_parametrized_kernels(PolyKernel, 'degree', poly_degree_values)
        self.add_parametrized_kernels(GaussianKernel, 'sigma', rbf_sigma_values)
        self.add_parametrized_kernels(LaplacianKernel, 'gamma', laplace_gamma_values)


[docs]    def add_parametrized_kernels(self, kernel_func, param, values):
        """
        Adds a list of kernels parametrized by various values for a given param

        Parameters
        ----------
        kernel_func : BaseKernelFunction
            Kernel function to be added (not an instance, but callable class)

        param : str
            Name of the parameter to the above kernel function

        values : Iterable
            List of parameter values. One kernel will be added for each value

        """

        if (not isinstance(kernel_func, type)) or \
            (not issubclass(kernel_func, BaseKernelFunction)):
            raise KernelMethodsException('Input {} is not a valid kernel func!'
                                         ' Must be derived from BaseKernelFunction'
                                         ''.format(kernel_func))

        if values is None:
            # warn('No values provided for {}. Doing nothing!'.format(param))
            return

        if not is_iterable_but_not_str(values, min_length=1):
            raise ValueError('values must be an iterable set of param values (n>=1)')

        for val in values:
            try:
                param_dict = {param              : val,
                              'skip_input_checks': self._skip_input_checks}
                self.append(KernelMatrix(kernel_func(**param_dict),
                                         normalized=self._norm_kernels))
            except:
                warn('Unable to add {} to the bucket for {}={}. Skipping it.'
                     ''.format(kernel_func, param, val), KernelMethodsWarning)


[docs]def make_kernel_bucket(strategy='exhaustive',
                       normalize_kernels=True,
                       skip_input_checks=False):
    """
    Generates a candidate kernels based on user preferences.

    Parameters
    ----------
    strategy : str
        Name of the strategy for populating the kernel bucket.
        Options: 'exhaustive' and 'light'. Default: 'exhaustive'

    normalize_kernels : bool
        Flag to indicate whether to normalize the kernel matrices

    skip_input_checks : bool
        Flag to indicate whether checks on input data (type, format etc) can
        be skipped. This helps save a tiny bit of runtime for expert uses when
        data types and formats are managed thoroughly in numpy. Default:
        False. Disable this only when you know exactly what you're doing!

    Returns
    -------
    kb : KernelBucket
        Kernel bucket populated according to the requested strategy

    """

    if isinstance(strategy, (KernelBucket, KernelSet)):
        import warnings
        warnings.warn('Input is already a kernel bucket/set - simply returning it!')
        return strategy

    strategy = strategy.lower()
    if strategy == 'exhaustive':
        return KernelBucket(name='KBucketExhaustive',
                            normalize_kernels=normalize_kernels,
                            skip_input_checks=skip_input_checks,
                            poly_degree_values=cfg.default_degree_values_poly_kernel,
                            rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel,
                            laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel)
    elif strategy == 'light':
        return KernelBucket(name='KBucketLight',
                            normalize_kernels=normalize_kernels,
                            skip_input_checks=skip_input_checks,
                            poly_degree_values=cfg.light_degree_values_poly_kernel,
                            rbf_sigma_values=cfg.light_sigma_values_gaussian_kernel,
                            laplace_gamma_values=cfg.light_gamma_values_laplacian_kernel)
    else:
        raise ValueError('Invalid choice of strategy '
                         '- must be one of {}'.format(cfg.kernel_bucket_strategies))


[docs]def ideal_kernel(targets):
    """
    Computes the kernel matrix from the given target labels.

    Parameters
    ----------
    targets : Iterable
        Target values (``y``) to compute the ideal kernel from.

    Returns
    -------
    ideal_kernel : ndarray
        The ideal kernel from (``yy\ :sup:`T` ``)

    """

    targets = np.array(targets).reshape((-1, 1))  # row vector

    return targets.dot(targets.T)


[docs]def correlation_km(k1, k2):
    """
    Computes [pearson] correlation coefficient between two kernel matrices

    Parameters
    ----------
    k1, k2 : ndarray
        Two kernel matrices of the same size

    Returns
    -------
    corr_coef : float
        Correlation coefficient between the vectorized kernel matrices

    """

    corr_coef, p_val = pearsonr(k1.ravel(), k2.ravel())

    return corr_coef


[docs]def pairwise_similarity(k_bucket, metric='corr'):
    """
    Computes the similarity between all pairs of kernel matrices in a given bucket.

    Parameters
    ----------
    k_bucket : KernelBucket
        Container of length num_km, with each an instance ``KernelMatrix``

    metric : str
        Identifies the metric to be used. Options: ``corr`` (correlation
        coefficient) and ``align`` (centered alignment).

    Returns
    -------
    pairwise_metric : ndarray of shape (num_km, num_km)
        A symmetric matrix computing the pairwise similarity between the various
        kernel matrices

    """

    # mutual info?
    metric_func = {'corr' : correlation_km,
                   'align': partial(alignment_centered, value_if_zero_division=0.0)}

    num_kernels = k_bucket.size
    estimator = metric_func[metric]
    pairwise_metric = np.full((k_bucket.size, k_bucket.size), fill_value=np.nan)
    for idx_one in range(num_kernels):
        # kernel matrix is symmetric
        for idx_two in range(idx_one, num_kernels): # computing i,i as well to be consistent
            pairwise_metric[idx_one, idx_two] = estimator(k_bucket[idx_one].full,
                                                          k_bucket[idx_two].full)

        # not computing diagonal entries (can also be set to 1 for some metrics)

    # making it symmetric
    idx_lower_tri = np.tril_indices(num_kernels)
    pairwise_metric[idx_lower_tri] = pairwise_metric.T[idx_lower_tri]

    return pairwise_metric
kernelmethods 0.2 documentation

Source code for kernelmethods.sampling