Source code for compare


import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import friedmanchisquare, rankdata, norm
from scipy.special import gammaln

[docs]def pairwise(accuracy_balanced, method_names, out_results_dir, num_repetitions): """ Produces a matrix of pair-wise significance tests, where each cell [i, j] answers the question: is method i significantly better than method j? The result would be based on a test of choice. The default test would be non-parametric Friedman test. """ bal_acc_transposed = accuracy_balanced.T # num_datasets x num_rep (each CV rep is considered a performance measurement on a new dataset, although not independent from each other) num_datasets = len(method_names) median_bal_acc = np.nanmedian(accuracy_balanced, axis=0) ranks = np.rank(median_bal_acc) critical_dist = compute_critical_dist(ranks) signif_matrix = np.full([num_datasets, num_datasets], np.nan) for m1, method_name in enumerate(method_names): for m2 in range(m1+1, num_datasets+1, 1): signif_matrix[m1, m2] = check_if_better(ranks[m1], ranks[m2], critical_dist) return signif_matrix
[docs]def compute_critical_dist(ranks): "" pass
[docs]def check_if_better(rank_one, rank_two, critical_dist): "Checks whether rank1 is greater than rank2 by at least critical dist" is_better = rank_one - rank_two >= critical_dist return is_better
[docs]def vertical_nemenyi_plot(data, num_reps, alpha = 0.05, cmap = plt.cm.Greens): """Vertical Nemenyi plot to compare model ranks and show differences.""" return