Source code for kernelmethods.ranking

"""

Module gathering techniques and helpers to rank kernels using various methods and
metrics, such as

 - their target alignment,
 - performance in cross-validation

"""

import numpy as np
from kernelmethods import config as cfg
from kernelmethods.sampling import KernelBucket
from kernelmethods.utils import min_max_scale


[docs]def find_optimal_kernel(kernel_bucket, sample, targets, method='align/corr', **method_params): """ Finds the optimal kernel for the current sample given their labels. Parameters ---------- kernel_bucket : KernelBucket The collection of kernels to evaluate and rank sample : ndarray The dataset given kernel bucket to be evaluated on targets : ndarray Target labels for each point in the sample dataset method : str identifier for the metric to choose to rank the kernels Returns ------- km : KernelMatrix Instance of KernelMatrix with the optimal kernel function """ if not isinstance(kernel_bucket, KernelBucket): raise TypeError('Input is not of required type: KernelBucket') method = method.lower() if method not in cfg.VALID_RANKING_METHODS: raise NotImplementedError('Ranking method not recognized. Choose one of {}' ''.format(cfg.VALID_RANKING_METHODS)) kernel_bucket.attach_to(sample=sample) metric = rank_kernels(kernel_bucket, targets, method=method, **method_params) return kernel_bucket[np.argmax(metric)]
[docs]def rank_kernels(kernel_bucket, targets, method='align/corr', **method_params): """ Computes a given ranking metric for all the kernel matrices in the bucket. Choices for the method include: "align/corr", "cv_risk" Parameters ---------- kernel_bucket : KernelBucket targets : Iterable target values of the sample attached to the bucket method : str Identifies one of the metrics: ``align/corr``, ``cv_risk`` method_params : dict Additional parameters to be passed on to the method chosen above. Returns ------- scores : ndarray Values of the ranking metrics computed for the kernel matrices in the bucket """ method = method.lower() if method not in cfg.VALID_RANKING_METHODS: raise NotImplementedError('Ranking method not recognized. Choose one of {}' ''.format(cfg.VALID_RANKING_METHODS)) if method in ("align/corr",): return alignment_ranking(kernel_bucket, targets, **method_params) elif method in ('cv_risk', 'cv'): return CV_ranking(kernel_bucket, targets, **method_params)
[docs]def CV_ranking(kernel_bucket, targets, num_folds=3, estimator_name='SVM'): """ Ranks kernels by their performance measured via cross-validation (CV). Parameters ---------- kernel_bucket : KernelBucket targets : Iterable target values of the sample attached to the bucket num_folds : int Number of folds for the CV to be employed estimator_name : str Name of a valid Scikit-Learn estimator. Default: ``SVM`` Returns ------- scores : ndarray CV performance computed for the kernel matrices in the bucket """ from sklearn.model_selection import GridSearchCV cv_scores = list() for km in kernel_bucket: estimator, param_grid = get_estimator(estimator_name) gs = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=num_folds) gs.fit(km.full, targets) cv_scores.append(gs.best_score_) # scaling helps compare across multiple metrics return 100 * min_max_scale(cv_scores)
[docs]def alignment_ranking(kernel_bucket, targets, **method_params): """Method to rank kernels that depend on target alignment. .. note: To be implemented. """ raise NotImplementedError()
[docs]def get_estimator(learner_id='svm'): """ Returns a valid kernel machine to become the base learner of the MKL methods. Base learner must be able to accept a precomputed kernel for fit/predict methods! Parameters ---------- learner_id : str Identifier for the estimator to be chosen. Options: ``SVM`` and ``SVR``. Default: ``SVM`` Returns ------- base_learner : Estimator An sklearn estimator param_grid : dict Parameter grid (sklearn format) for the chosen estimator. """ # TODO hyper-param optimization needs to be incorporated somewhere!! # Perhaps by returning a GridSearchCV(base_learner) object or similar? learner_id = learner_id.lower() if learner_id in ('svm', 'svc'): from sklearn.svm import SVC range_C = np.power(10.0, range(-6, 6)) param_grid = dict(C=range_C) base_learner = SVC(kernel='precomputed', probability=True, C=10) elif learner_id in ('svr',): from sklearn.svm import SVR range_C = np.power(10.0, range(-6, 6)) param_grid = dict(C=range_C) base_learner = SVR(kernel='precomputed', C=10) else: raise NotImplementedError('Requested base learner {} is not implemented yet!' ''.format(learner_id)) return base_learner, param_grid