Source code for kernelmethods.algorithms

"""

Module to gather various high-level algorithms based on the kernel methods,
    such as kernel-based predictive models for classification and regression.

"""

from copy import deepcopy

from kernelmethods import config as cfg
from kernelmethods.base import KernelMatrix
from kernelmethods.ranking import find_optimal_kernel, get_estimator
from kernelmethods.sampling import KernelBucket, make_kernel_bucket
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.svm import SVR
from sklearn.utils.validation import check_X_y, check_array


[docs]class KernelMachine(BaseEstimator):
    """Generic class to return a drop-in sklearn estimator.

    Parameters
    ----------
    k_func : KernelFunction
        The kernel function the kernel machine bases itself on

    learner_id : str
        Identifier for the estimator to be built based on the kernel function.
        Options: ``SVM`` and ``SVR``.
        Default: ``SVR``

    """


    def __init__(self,
                 k_func,
                 learner_id='SVR'):
        """
        Constructor for the KernelMachine class.

        Parameters
        ----------
        k_func : KernelFunction
            The kernel function the kernel machine bases itself on

        learner_id : str
            Identifier for the estimator to be built based on the kernel function.
            Options: ``SVM`` and ``SVR``.
            Default: ``SVR``
        """

        self.k_func = k_func
        self.learner_id = learner_id
        self._estimator, self.param_grid = get_estimator(self.learner_id)


[docs]    def fit(self, X, y, sample_weight=None):
        """Fit the chosen Estimator based on the user-defined kernel.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
            For kernel="precomputed", the expected shape of X is
            (n_samples, n_samples).

        y : array-like, shape (n_samples,)
            Target values (class labels in classification, real numbers in
            regression)

        sample_weight : array-like, shape (n_samples,)
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : object

        Notes
        ------
        If X and y are not C-ordered and contiguous arrays of np.float64 and
        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.

        If X is a dense array, then the other methods will not support sparse
        matrices as input.

        """

        self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)

        self._km = KernelMatrix(self.k_func, name='train_km')
        self._km.attach_to(self._train_X)

        self._estimator.fit(X=self._km.full, y=self._train_y,
                            sample_weight=sample_weight)

        return self


[docs]    def predict(self, X):
        """
        Make predictions on the new samplets in X.

        For an one-class model, +1 or -1 is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            For kernel="precomputed", the expected shape of X is
            [n_samples_test, n_samples_train]

        Returns
        -------
        y_pred : array, shape (n_samples,)
            Class labels for samples in X.
        """

        X = check_array(X)

        # sample_one must be test data to get the right shape for sklearn X
        self._km.attach_to(sample_one=X, sample_two=self._train_X)
        test_train_KM = self._km.full
        predicted_y = self._estimator.predict(test_train_KM)

        return predicted_y
        # TODO we don't need data type conversion, as things can be
        #  different in classifiers and regressors?
        # return np.asarray(predicted_y, dtype=np.intp)


[docs]    def get_params(self, deep=True):
        """returns all the relevant parameters for this estimator!"""

        # est_param_dict = self._estimator.get_params(deep=deep)
        # est_param_dict['k_func'] = self.k_func
        # est_param_dict['learner_id'] = self.learner_id
        # est_param_dict['learner_params'] = self.learner_params
        # return est_param_dict

        return {'k_func'    : self.k_func,
                'learner_id': self.learner_id}


[docs]    def set_params(self, **parameters):
        """Param setter"""

        for parameter, value in parameters.items():
            if parameter in ('k_func', 'learner_id'):  # 'learner_params'
                setattr(self, parameter, value)
            # else:
            #     setattr(self._estimator, parameter, value)

        return self


[docs]class OptimalKernelSVR(SVR, RegressorMixin):
    """
    An estimator to learn the optimal kernel for a given sample and
    build a support vector regressor based on this custom kernel.

    This class is wrapped around the sklearn SVR estimator to function as its
    drop-in replacement, whose implementation is in turn based on LIBSVM.

    Parameters
    ----------

    k_bucket : KernelBucket or str
        An instance of KernelBucket that contains all the kernels to be compared,
        or a string identifying the sampling_strategy which populates a KernelBucket.


    Attributes
    ----------
    support_ : array-like, shape = [n_SV]
        Indices of support vectors.

    support_vectors_ : array-like, shape = [nSV, n_features]
        Support vectors.

    dual_coef_ : array, shape = [1, n_SV]
        Coefficients of the support vector in the decision function.

    coef_ : array, shape = [1, n_features]
        Weights assigned to the features (coefficients in the primal
        problem). This is only available in the case of a linear kernel.

        `coef_` is readonly property derived from `dual_coef_` and
        `support_vectors_`.

    intercept_ : array, shape = [1]
        Constants in decision function.

    """


    def __init__(self, k_bucket='exhaustive', method='cv_risk'):

        super().__init__(kernel='precomputed')

        self.k_bucket = k_bucket
        self.method = method


[docs]    def fit(self, X, y, sample_weight=None):
        """Estimate the optimal kernel, and fit a SVM based on the custom kernel.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
            For kernel="precomputed", the expected shape of X is
            (n_samples, n_samples).

        y : array-like, shape (n_samples,)
            Target values (class labels in classification, real numbers in
            regression)

        sample_weight : array-like, shape (n_samples,)
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : object

        Notes
        ------
        If X and y are not C-ordered and contiguous arrays of np.float64 and
        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.

        If X is a dense array, then the other methods will not support sparse
        matrices as input.

        """

        if isinstance(self.k_bucket, str):
            try:
                # using a new internal variable to retain user supplied param
                self._k_bucket = make_kernel_bucket(self.k_bucket)
            except:
                raise ValueError('Input for k_func can only an instance of '
                                 'KernelBucket or a sampling strategy to generate '
                                 'one with make_kernel_bucket.'
                                 'sampling strategy must be one of {}'
                                 ''.format(cfg.kernel_bucket_strategies))
        elif isinstance(self.k_bucket, KernelBucket):
            self._k_bucket = deepcopy(self.k_bucket)
        else:
            raise ValueError('Input for k_func can only an instance of '
                             'KernelBucket or a sampling strategy to generate '
                             'one with make_kernel_bucket')

        self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)

        self.opt_kernel_ = find_optimal_kernel(self._k_bucket,
                                               self._train_X, self._train_y,
                                               method=self.method,
                                               estimator_name='SVR')

        super().fit(X=self.opt_kernel_.full, y=self._train_y,
                    sample_weight=sample_weight)

        # temporary hack to pass sklearn estimator checks till a bug is fixed
        # for more see: https://github.com/scikit-learn/scikit-learn/issues/14712
        self.n_iter_ = 1

        return self


[docs]    def predict(self, X):
        """
        Perform classification on samples in X.

        For an one-class model, +1 or -1 is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            For kernel="precomputed", the expected shape of X is
            [n_samples_test, n_samples_train]

        Returns
        -------
        y_pred : array, shape (n_samples,)
            Class labels for samples in X.
        """

        if not hasattr(self, 'opt_kernel_'):
            raise ValueError("Can't predict - not fitted yet! Run .fit() first.")

        X = check_array(X)

        # sample_one must be test data to get the right shape for sklearn X
        self.opt_kernel_.attach_to(sample_one=X, sample_two=self._train_X)
        test_train_KM = self.opt_kernel_.full
        predicted_y = super().predict(test_train_KM)

        return predicted_y
        # TODO we don't need data type coversion, as its not classification?
        # return np.asarray(predicted_y, dtype=np.intp)


[docs]    def get_params(self, deep=True):
        """returns all the relevant parameters for this estimator!"""

        return {'k_bucket': self.k_bucket,
                'method'  : self.method}


[docs]    def set_params(self, **parameters):
        """Param setter"""

        for parameter, value in parameters.items():
            if parameter in ('k_bucket', 'method'):
                setattr(self, parameter, value)

        return self
kernelmethods 0.2 documentation

Source code for kernelmethods.algorithms