"""
Module to gather various high-level algorithms based on the kernel methods,
such as kernel-based predictive models for classification and regression.
"""
from copy import deepcopy
from kernelmethods import config as cfg
from kernelmethods.base import KernelMatrix
from kernelmethods.ranking import find_optimal_kernel, get_estimator
from kernelmethods.sampling import KernelBucket, make_kernel_bucket
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.svm import SVR
from sklearn.utils.validation import check_X_y, check_array
[docs]class KernelMachine(BaseEstimator):
"""Generic class to return a drop-in sklearn estimator.
Parameters
----------
k_func : KernelFunction
The kernel function the kernel machine bases itself on
learner_id : str
Identifier for the estimator to be built based on the kernel function.
Options: ``SVM`` and ``SVR``.
Default: ``SVR``
"""
def __init__(self,
k_func,
learner_id='SVR'):
"""
Constructor for the KernelMachine class.
Parameters
----------
k_func : KernelFunction
The kernel function the kernel machine bases itself on
learner_id : str
Identifier for the estimator to be built based on the kernel function.
Options: ``SVM`` and ``SVR``.
Default: ``SVR``
"""
self.k_func = k_func
self.learner_id = learner_id
self._estimator, self.param_grid = get_estimator(self.learner_id)
[docs] def fit(self, X, y, sample_weight=None):
"""Fit the chosen Estimator based on the user-defined kernel.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
For kernel="precomputed", the expected shape of X is
(n_samples, n_samples).
y : array-like, shape (n_samples,)
Target values (class labels in classification, real numbers in
regression)
sample_weight : array-like, shape (n_samples,)
Per-sample weights. Rescale C per sample. Higher weights
force the classifier to put more emphasis on these points.
Returns
-------
self : object
Notes
------
If X and y are not C-ordered and contiguous arrays of np.float64 and
X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
If X is a dense array, then the other methods will not support sparse
matrices as input.
"""
self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
self._km = KernelMatrix(self.k_func, name='train_km')
self._km.attach_to(self._train_X)
self._estimator.fit(X=self._km.full, y=self._train_y,
sample_weight=sample_weight)
return self
[docs] def predict(self, X):
"""
Make predictions on the new samplets in X.
For an one-class model, +1 or -1 is returned.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
[n_samples_test, n_samples_train]
Returns
-------
y_pred : array, shape (n_samples,)
Class labels for samples in X.
"""
X = check_array(X)
# sample_one must be test data to get the right shape for sklearn X
self._km.attach_to(sample_one=X, sample_two=self._train_X)
test_train_KM = self._km.full
predicted_y = self._estimator.predict(test_train_KM)
return predicted_y
# TODO we don't need data type conversion, as things can be
# different in classifiers and regressors?
# return np.asarray(predicted_y, dtype=np.intp)
[docs] def get_params(self, deep=True):
"""returns all the relevant parameters for this estimator!"""
# est_param_dict = self._estimator.get_params(deep=deep)
# est_param_dict['k_func'] = self.k_func
# est_param_dict['learner_id'] = self.learner_id
# est_param_dict['learner_params'] = self.learner_params
# return est_param_dict
return {'k_func' : self.k_func,
'learner_id': self.learner_id}
[docs] def set_params(self, **parameters):
"""Param setter"""
for parameter, value in parameters.items():
if parameter in ('k_func', 'learner_id'): # 'learner_params'
setattr(self, parameter, value)
# else:
# setattr(self._estimator, parameter, value)
return self
[docs]class OptimalKernelSVR(SVR, RegressorMixin):
"""
An estimator to learn the optimal kernel for a given sample and
build a support vector regressor based on this custom kernel.
This class is wrapped around the sklearn SVR estimator to function as its
drop-in replacement, whose implementation is in turn based on LIBSVM.
Parameters
----------
k_bucket : KernelBucket or str
An instance of KernelBucket that contains all the kernels to be compared,
or a string identifying the sampling_strategy which populates a KernelBucket.
Attributes
----------
support_ : array-like, shape = [n_SV]
Indices of support vectors.
support_vectors_ : array-like, shape = [nSV, n_features]
Support vectors.
dual_coef_ : array, shape = [1, n_SV]
Coefficients of the support vector in the decision function.
coef_ : array, shape = [1, n_features]
Weights assigned to the features (coefficients in the primal
problem). This is only available in the case of a linear kernel.
`coef_` is readonly property derived from `dual_coef_` and
`support_vectors_`.
intercept_ : array, shape = [1]
Constants in decision function.
"""
def __init__(self, k_bucket='exhaustive', method='cv_risk'):
super().__init__(kernel='precomputed')
self.k_bucket = k_bucket
self.method = method
[docs] def fit(self, X, y, sample_weight=None):
"""Estimate the optimal kernel, and fit a SVM based on the custom kernel.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
For kernel="precomputed", the expected shape of X is
(n_samples, n_samples).
y : array-like, shape (n_samples,)
Target values (class labels in classification, real numbers in
regression)
sample_weight : array-like, shape (n_samples,)
Per-sample weights. Rescale C per sample. Higher weights
force the classifier to put more emphasis on these points.
Returns
-------
self : object
Notes
------
If X and y are not C-ordered and contiguous arrays of np.float64 and
X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
If X is a dense array, then the other methods will not support sparse
matrices as input.
"""
if isinstance(self.k_bucket, str):
try:
# using a new internal variable to retain user supplied param
self._k_bucket = make_kernel_bucket(self.k_bucket)
except:
raise ValueError('Input for k_func can only an instance of '
'KernelBucket or a sampling strategy to generate '
'one with make_kernel_bucket.'
'sampling strategy must be one of {}'
''.format(cfg.kernel_bucket_strategies))
elif isinstance(self.k_bucket, KernelBucket):
self._k_bucket = deepcopy(self.k_bucket)
else:
raise ValueError('Input for k_func can only an instance of '
'KernelBucket or a sampling strategy to generate '
'one with make_kernel_bucket')
self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
self.opt_kernel_ = find_optimal_kernel(self._k_bucket,
self._train_X, self._train_y,
method=self.method,
estimator_name='SVR')
super().fit(X=self.opt_kernel_.full, y=self._train_y,
sample_weight=sample_weight)
# temporary hack to pass sklearn estimator checks till a bug is fixed
# for more see: https://github.com/scikit-learn/scikit-learn/issues/14712
self.n_iter_ = 1
return self
[docs] def predict(self, X):
"""
Perform classification on samples in X.
For an one-class model, +1 or -1 is returned.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
For kernel="precomputed", the expected shape of X is
[n_samples_test, n_samples_train]
Returns
-------
y_pred : array, shape (n_samples,)
Class labels for samples in X.
"""
if not hasattr(self, 'opt_kernel_'):
raise ValueError("Can't predict - not fitted yet! Run .fit() first.")
X = check_array(X)
# sample_one must be test data to get the right shape for sklearn X
self.opt_kernel_.attach_to(sample_one=X, sample_two=self._train_X)
test_train_KM = self.opt_kernel_.full
predicted_y = super().predict(test_train_KM)
return predicted_y
# TODO we don't need data type coversion, as its not classification?
# return np.asarray(predicted_y, dtype=np.intp)
[docs] def get_params(self, deep=True):
"""returns all the relevant parameters for this estimator!"""
return {'k_bucket': self.k_bucket,
'method' : self.method}
[docs] def set_params(self, **parameters):
"""Param setter"""
for parameter, value in parameters.items():
if parameter in ('k_bucket', 'method'):
setattr(self, parameter, value)
return self