Source code for multi_edge

__all__ = ['extract_multiedge', 'summarize_multigraph']

import sys
import traceback
import warnings
from functools import partial
from multiprocessing import Manager, Pool
from os.path import getsize, isfile
from sys import version_info

import hiwenet
import networkx as nx
import numpy as np

if version_info.major > 2:
    from graynet.utils import (stamp_expt_multiedge, check_params_multiedge,
                               make_output_path_graph,
                               save_graph, check_subjects, check_stat_methods,
                               check_num_bins,
                               check_weights,
                               check_num_procs, check_atlas, check_edge_range_dict,
                               mask_background_roi,
                               warn_nan,
                               stamp_expt_weight, import_features,
                               save_per_subject_graph)
    from graynet import parcellate
    from graynet import config_graynet as cfg
else:
    raise NotImplementedError(
            'graynet supports only Python 3 or higher. '
            'Upgrade to Python 3+ is highly recommended.')


[docs]def extract_multiedge(subject_id_list, input_dir, base_feature_list=cfg.default_features_multi_edge, weight_method_list=cfg.default_weight_method, summary_stats=cfg.multi_edge_summary_func_default, num_bins=cfg.default_num_bins, edge_range_dict=cfg.edge_range_predefined, atlas=cfg.default_atlas, smoothing_param=cfg.default_smoothing_param, node_size=cfg.default_node_size, out_dir=None, return_results=False, overwrite_results=False, num_procs=cfg.default_num_procs): """ Extracts weighted networks (matrix of pair-wise ROI distances) based on multiple gray matter features based on Freesurfer processing. Parameters ---------- subject_id_list : str or list must be path to a file containing subject IDs, or a list of subject IDs input_dir : str Path to the input directory where features can be read. This can be Freesurfer's SUBJECTS_DIR, where output processing is stored. Or another directory with a structure that graynet can parse. base_feature_list : list Set of features that drive the different edges between the pair of ROIs. For example, if you choose thickness and pial_curv, each pair of ROIs will have two edges. This multi-edge network can be turned into a single network based on averaging weights from different individual networks, or computing another summary statistic of your interest. weight_method_list : string(s), optional Type of distance (or metric) to compute between the pair of histograms. It must be one of the following methods: - 'chebyshev' - 'chebyshev_neg' - 'chi_square' - 'correlate' - 'correlate_1' - 'cosine' - 'cosine_1' - 'cosine_2' - 'cosine_alt' - 'euclidean' - 'fidelity_based' - 'histogram_intersection' - 'histogram_intersection_1' - 'jensen_shannon' - 'kullback_leibler' - 'manhattan' - 'minowski' - 'noelle_1' - 'noelle_2' - 'noelle_3' - 'noelle_4' - 'noelle_5' - 'relative_bin_deviation' - 'relative_deviation' Note only the following are *metrics*: - 'manhattan' - 'minowski' - 'euclidean' - 'noelle_2' - 'noelle_4' - 'noelle_5' The following are *semi- or quasi-metrics*: - 'kullback_leibler' - 'jensen_shannon' - 'chi_square' - 'chebyshev' - 'cosine_1' - 'chebyshev_neg' - 'correlate_1' - 'histogram_intersection_1' - 'relative_deviation' - 'relative_bin_deviation' - 'noelle_1' - 'noelle_3' The following are classified to be similarity functions: - 'histogram_intersection' - 'correlate' - 'cosine' - 'cosine_2' - 'cosine_alt' - 'fidelity_based' *Default* choice: 'manhattan'. summary_stats : list of str A string, or list of strings, each representing a method (like 'median', 'prod' or 'max'), to compute a summay statistic from the array of multiple weights computed. This must be available as a member of numpy or scipy.stats. num_bins : int Number of histogram bins to use when computing pair-wise weights. Default : 25 edge_range_dict : tuple or list The range of edges (two finite values) within which to build the histogram e.g. ``--edge_range 0 5``. This can be helpful (and important) to ensure correspondence across multiple invocations of graynet (e.g. for different subjects), in terms of range across all bins as well as individual bin edges. Default : - ( 0.0, 5.0) for ``freesurfer_thickness`` and - (-0.3, 0.3) for ``freesurfer_curv``. atlas : str Name of the atlas whose parcellation to be used. Choices for parcellation: ['fsaverage', 'glasser2016'], which are primary cortical. Volumetric whole-brain atlases will be added soon. smoothing_param : scalar Smoothing parameter, which could be fwhm for Freesurfer cortical features, or another relevant for the chosen base_feature_list. Default: assumed as fwhm=10mm for the default feature choice 'thickness' node_size : scalar, optional Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature. This feature is not implemented yet, just a placeholder and to enable default computation. out_dir : Path or str, optional Path to output directory to store results. Default: None, results are returned, but not saved to disk. If this is None, return_results must be true. return_results : bool Flag to indicate whether to return the results to be returned. This flag helps to reduce the memory requirements, when the number of nodes in a parcellation or the number of subjects or weight methods are large, as it doesn't retain results for all combinations, when running from commmand line interface (or HPC). Default: False If this is False, out_dir must be specified to save the results to disk. overwrite_results : bool Flag to request overwriting of existing results, in case of reruns/failed jobs. By default, if the expected output file exists and is of non-zero size, its computation is skipped (assuming the file is complete, usable and not corrupted). num_procs : int Number of parallel processes to use to speed up computation. Returns ------- edge_weights_all : dict, None If return_results is True, this will be a dictionary keyed in by a tuple: (weight method, subject_ID) The value of each edge_weights_all[(weight method, subject_ID)] is a numpy array of length p = k*(k-1)/2, with k = number of nodes in the atlas parcellation. If return_results is False, this will be None, which is the default. """ # volumetric version is not fully tested yet! for feat in base_feature_list: if feat in cfg.features_volumetric: raise NotImplementedError('MultiEdge networks are not yet supported ' 'for volumetric features! ' 'They are under development. Stay tuned.') # All the checks must happen here, as this is key function in the API check_params_multiedge(base_feature_list, input_dir, atlas, smoothing_param, node_size, out_dir, return_results) atlas, atlas_name = check_atlas(atlas) subject_id_list, num_subjects, max_id_width, nd_id = check_subjects(subject_id_list) num_bins = check_num_bins(num_bins) edge_range_dict = check_edge_range_dict(edge_range_dict, base_feature_list) weight_method_list, num_weights, max_wtname_width, nd_wm = check_weights( weight_method_list) # validating the choice and getting a callable summary_stats, summary_stat_names, _, _, _ = check_stat_methods(summary_stats) num_procs = check_num_procs(num_procs) pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width, nd_wm) # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas) # uniq_rois, roi_size, num_nodes = roi_info(roi_labels) uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas) print('\nProcessing {} features resampled to {} atlas,' ' smoothed at {} with node size {}'.format(base_feature_list, atlas_name, smoothing_param, node_size)) if not return_results: if out_dir is None: raise ValueError('When return_results=False, ' 'out_dir must be specified ' 'to be able to save the results.') if not out_dir.exists(): out_dir.mkdir(exist_ok=True, parents=True) partial_func_extract = partial(per_subject_multi_edge, input_dir, base_feature_list, roi_labels, centroids, weight_method_list, summary_stats, summary_stat_names, atlas, atlas_name, smoothing_param, node_size, num_bins, edge_range_dict, out_dir, return_results, overwrite_results, pretty_print_options) if num_procs > 1: chunk_size = int(np.ceil(num_subjects / num_procs)) with Manager(): with Pool(processes=num_procs) as pool: edge_weights_list_dicts = pool.map(partial_func_extract, subject_id_list, chunk_size) else: # reverting to sequential processing edge_weights_list_dicts = [partial_func_extract(subject=sub_id) for sub_id in subject_id_list] if return_results: edge_weights_all = dict() for combo in edge_weights_list_dicts: # each element from output of parallel loop is a dict # keyed in by {subject, weight) edge_weights_all.update(combo) else: edge_weights_all = None print('\ngraynet computation done.') return edge_weights_all
def per_subject_multi_edge(input_dir, base_feature_list, roi_labels, centroids, weight_method_list, summary_stats, summary_stat_names, atlas, atlas_name, smoothing_param, node_size, num_bins, edge_range_dict, out_dir, return_results, overwrite_results, pretty_print_options, subject=None): # purposefully leaving it last to enable partial function creation """ Extracts give set of weights for one subject. """ if subject is None: return if return_results: edge_weights_all = dict() else: edge_weights_all = None max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options for ww, weight_method in enumerate(weight_method_list): expt_id_multi = stamp_expt_multiedge(base_feature_list, atlas_name, smoothing_param, node_size, weight_method) out_path_multigraph = make_output_path_graph(out_dir, subject, [expt_id_multi, 'multigraph']) # skipping the computation if the file exists already if not overwrite_results and isfile(out_path_multigraph) and getsize( out_path_multigraph) > 0: print('\nMultigraph exists already at\n\t{}\n' ' skipping its computation!'.format(out_path_multigraph)) multigraph = None # signal to re-read else: multigraph = nx.MultiGraph() for base_feature in base_feature_list: try: features = import_features(input_dir, [subject, ], base_feature, fwhm=smoothing_param, atlas=atlas) except: traceback.print_exc() warnings.warn('Unable to read {} features' ' for {}\n Skipping it.' ''.format(base_feature, subject), UserWarning) return data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name) # unique stamp for each subject and weight expt_id_single = stamp_expt_weight(base_feature, atlas_name, smoothing_param, node_size, weight_method) sys.stdout.write('\nProcessing id {:{id_width}} --' ' weight {:{wtname_width}} ({:{nd_wm}}/{:{nd_wm}})' ' :'.format(subject, weight_method, ww + 1, num_weights, nd_id=nd_id, nd_wm=nd_wm, id_width=max_id_width, wtname_width=max_wtname_width)) # actual computation of pair-wise features try: unigraph = hiwenet.extract(data, rois, weight_method=weight_method, num_bins=num_bins, edge_range=edge_range_dict[base_feature], return_networkx_graph=True) # retrieving edge weights weight_vec = np.array(list(nx.get_edge_attributes(unigraph, 'weight').values())) warn_nan(weight_vec) if return_results: edge_weights_all[(weight_method, base_feature, subject)] = weight_vec except (RuntimeError, RuntimeWarning) as runexc: print(runexc) except KeyboardInterrupt: print('Exiting on keyborad interrupt! \n' 'Abandoning the remaining processing ') sys.exit(1) except: print('Unable to extract {} weights for {} for {}' ''.format(weight_method, base_feature, subject)) traceback.print_exc() print('Done.') # TODO consider extracting some network features upon user request. add_nodal_positions(unigraph, centroids) save_per_subject_graph(unigraph, out_dir, subject, expt_id_single) # adding edges/weights from each feature to a multigraph # this also encodes the sources for u, v in unigraph.edges(): multigraph.add_edge(u, v, weight=unigraph[u][v]['weight'], base_feature=base_feature) # adding position info to nodes (for visualization later) add_nodal_positions(multigraph, centroids) save_graph(multigraph, out_path_multigraph, 'multi-edge') for stat_func, stat_name in zip(summary_stats, summary_stat_names): # creating single graph with a summary edge weight (like median) out_path_summary = make_output_path_graph(out_dir, subject, [expt_id_multi, stat_name, 'multigraph']) if (not overwrite_results) and \ isfile(out_path_summary) and \ (getsize(out_path_summary) > 0): print('Summary {} of multigraph exists already at\n\t{}\n' ' skipping its computation!' ''.format(stat_name, out_path_summary)) else: if multigraph is None: multigraph = nx.read_graphml(out_path_multigraph) try: summary_multigraph = summarize_multigraph(multigraph, stat_func) add_nodal_positions(summary_multigraph, centroids) save_graph(summary_multigraph, out_path_summary, '{} summary'.format(stat_name)) except: print('Summary {} could not be computed - skipping!' ''.format(stat_name)) traceback.print_exc() return edge_weights_all def summarize_multigraph(multigraph, func_summary): """Creating single graph with a summary edge weight (like median)""" summary_multigraph = nx.Graph() for u, v in multigraph.edges(): # looping through parallel edges and obtaining their weights. all_weights = np.array([edge_item['weight'] for idx, edge_item in multigraph[u][v].items()]) # float() needed due to graphml limitation summary_weight = float(func_summary(all_weights)) summary_multigraph.add_edge(u, v, weight=summary_weight) return summary_multigraph def add_nodal_positions(graph, centroids): """Adds the x, y, z attributes to each node in graph.""" # adding position info to nodes (for visualization later) for roi in centroids: graph.nodes[roi]['x'] = float(centroids[roi][0]) graph.nodes[roi]['y'] = float(centroids[roi][1]) graph.nodes[roi]['z'] = float(centroids[roi][2]) return def save_summary_graph(graph, out_dir, subject, str_suffix=None, summary_descr='summary'): """Saves the features to disk.""" if out_dir is not None: # get outpath returned from hiwenet based on dist name and all other params # choose out_dir name based on dist name and all other parameters out_subject_dir = out_dir.joinpath(subject) if not out_subject_dir.exists(): out_subject_dir.mkdir(exist_ok=True, parents=True) if str_suffix is not None: out_file_name = '{}_{}_multigraph_graynet.graphml' \ ''.format(str_suffix, summary_descr) else: out_file_name = '_{}_multigraph_graynet.graphml'.format(summary_descr) out_weights_path = out_subject_dir / out_file_name try: print(graph) nx.write_graphml(graph, out_weights_path, encoding='utf-8') print('\nSaved the summary multi-graph to \n{}'.format(out_weights_path)) except: print('\nUnable to save summary multi-graph to \n{}' ''.format(out_weights_path)) traceback.print_exc() return