Source code for run_workflow

__all__ = ['extract', 'roiwise_stats_indiv', 'cli_run']

import argparse
import logging
import pickle
import sys
import traceback
import warnings
from functools import partial
from multiprocessing import Manager as MultiProcManager, Pool
from pathlib import Path
from sys import version_info

import hiwenet
import networkx as nx
import numpy as np

from graynet.utils import (calc_roi_statistics, check_atlas, check_num_procs,
                           check_params_single_edge, check_stat_methods,
                           check_subjects, check_weight_params, check_weights,
                           import_features, mask_background_roi, save,
                           save_per_subject_graph, save_summary_stats,
                           stamp_experiment, stamp_expt_weight, warn_nan)

if version_info.major > 2:
    from graynet import utils
    from graynet.volumetric import extract_per_subject_volumetric, volumetric_roi_info
    from graynet.parcellate import roi_labels_centroids
    from graynet import config_graynet as cfg
    from graynet import __version__
else:
    raise NotImplementedError('graynet supports only Python 3. '
                              'Upgrade to Python 3.6 or higher is required.')

np.seterr(divide='ignore', invalid='ignore')


# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

[docs]def extract(subject_id_list,
            input_dir,
            base_feature=cfg.default_feature_single_edge,
            weight_method_list=cfg.default_weight_method,
            num_bins=cfg.default_num_bins,
            edge_range=cfg.default_edge_range,
            atlas=cfg.default_atlas,
            smoothing_param=cfg.default_smoothing_param,
            node_size=cfg.default_node_size,
            out_dir=None,
            return_results=False,
            num_procs=cfg.default_num_procs):
    """
    Extracts weighted networks (matrix of pair-wise ROI distances) from gray matter features based on Freesurfer processing.

    Parameters
    ----------
    subject_id_list : str or list
         must be path to a file containing subject IDs, or a list of subject IDs
    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.
    base_feature : str
        Specific type of feature to read for each subject from the input directory.

    weight_method_list : string(s), optional
        Type of distance (or metric) to compute between the pair of histograms.

        It must be one of the following methods:

        - 'chebyshev'
        - 'chebyshev_neg'
        - 'chi_square'
        - 'correlate'
        - 'correlate_1'
        - 'cosine'
        - 'cosine_1'
        - 'cosine_2'
        - 'cosine_alt'
        - 'euclidean'
        - 'fidelity_based'
        - 'histogram_intersection'
        - 'histogram_intersection_1'
        - 'jensen_shannon'
        - 'kullback_leibler'
        - 'manhattan'
        - 'minowski'
        - 'noelle_1'
        - 'noelle_2'
        - 'noelle_3'
        - 'noelle_4'
        - 'noelle_5'
        - 'relative_bin_deviation'
        - 'relative_deviation'

        Note only the following are *metrics*:

        - 'manhattan'
        - 'minowski'
        - 'euclidean'
        - 'noelle_2'
        - 'noelle_4'
        - 'noelle_5'

        The following are *semi- or quasi-metrics*:

        - 'kullback_leibler'
        - 'jensen_shannon'
        - 'chi_square'
        - 'chebyshev'
        - 'cosine_1'
        - 'chebyshev_neg'
        - 'correlate_1'
        - 'histogram_intersection_1'
        - 'relative_deviation'
        - 'relative_bin_deviation'
        - 'noelle_1'
        - 'noelle_3'

        The following are  classified to be similarity functions:

        - 'histogram_intersection'
        - 'correlate'
        - 'cosine'
        - 'cosine_2'
        - 'cosine_alt'
        - 'fidelity_based'

        *Default* choice: 'manhattan'.

    num_bins : int
        Number of histogram bins to use when computing pair-wise weights. Default: 25

    edge_range : tuple or list
        The range of edges (two finite values) within which to build the histogram
        e.g., ``--edge_range 0 5``.
        This can be helpful (and important) to ensure correspondence across
        multiple invocations of graynet (e.g. for different subjects),
        in terms of range across all bins as well as individual bin edges.

        Default :

            - ( 0.0, 5.0) for ``freesurfer_thickness`` and
            - (-0.3, 0.3) for ``freesurfer_curv``.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Choices for cortical parcellation: ['fsaverage', 'glasser2016'], which are primary cortical.
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches,
        depending on type of atlas or feature. This feature is not implemented
        yet, and this arg is just a placeholder and to enable default computation.

    out_dir : Path or str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicate whether to return the results to be returned.
        This flag helps to reduce the memory requirements, when the number of nodes
        in a parcellation or the number of subjects or weight methods are large,
        as it doesn't retain results for all combinations, when running from
        commmand line interface (or HPC).
        Default: False
        If this is False, out_dir must be specified to save the results to disk.

    num_procs : int
        Number of parallel processes to use to speed up computation.

    Returns
    -------
    edge_weights_all : dict, None
        If return_results is True, this will be a dictionary keyed in
        by a tuple: (weight method, subject_ID)
        The value of each edge_weights_all[(weight method, subject_ID)] is
        a numpy array of length p = k*(k-1)/2,
        with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    # All the checks must happen here, as this is key function in the API
    check_params_single_edge(base_feature, input_dir, atlas, smoothing_param,
                             node_size, out_dir, return_results)
    atlas, atlas_name = check_atlas(atlas)

    subject_id_list, num_subjects, \
        max_id_width, nd_id = check_subjects(subject_id_list)

    num_bins, edge_range = check_weight_params(num_bins, edge_range)
    weight_method_list, num_weights, \
        max_wtname_width, nd_wm = check_weights(weight_method_list)

    num_procs = check_num_procs(num_procs)
    pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width, nd_wm)

    # roi_labels, ctx_annot = freesurfer_roi_labels(atlas)
    # uniq_rois, roi_size, num_nodes = roi_info(roi_labels)

    print('\nProcessing {} features'.format(base_feature))

    if not return_results:
        if out_dir is None:
            raise ValueError('When return_results=False, out_dir must be specified '
                             'to be able to save the results.')
        if not out_dir.exists():
            out_dir.mkdir(exist_ok=True, parents=True)

    if base_feature in cfg.features_cortical:
        uniq_rois, centroids, roi_labels = roi_labels_centroids(atlas, node_size)
        partial_func_extract = partial(extract_per_subject_cortical, input_dir,
                                       base_feature, roi_labels, centroids,
                                       weight_method_list, atlas, atlas_name,
                                       smoothing_param, node_size, num_bins,
                                       edge_range, out_dir, return_results,
                                       pretty_print_options)
    elif base_feature in cfg.features_volumetric:
        uniq_rois, centroids, roi_labels = volumetric_roi_info(atlas)
        partial_func_extract = partial(extract_per_subject_volumetric,
                                       input_dir, base_feature, roi_labels,
                                       centroids, weight_method_list, atlas,
                                       atlas_name, smoothing_param, node_size,
                                       num_bins, edge_range, out_dir,
                                       return_results, pretty_print_options)
    else:
        raise NotImplementedError('Chosen feature {} is not recognized as '
                                  'either cortical or volumetric! Choose one'
                                  'from the following options: {}'
                                  ''.format(base_feature, cfg.base_feature_list))

    chunk_size = int(np.ceil(num_subjects / num_procs))
    if (num_procs > 1) and (num_subjects > 10) and (chunk_size > 5):
        with MultiProcManager():
            with Pool(processes=num_procs) as pool:
                edge_wt_list_of_dicts = pool.map(partial_func_extract,
                                                 subject_id_list, chunk_size)

        # each element from output of parallel loop is a dict, keys (subj_id, weight)
        edge_weights_all = dict()
        for combo in edge_wt_list_of_dicts:
            if combo is not None:
                edge_weights_all.update(combo)
    else:
        if num_procs > 1:
            print('info: not parallel processing due to too few subjects')
        edge_weights_all = dict()
        for subj in subject_id_list:
            ret_value = partial_func_extract(subj)
            if ret_value is not None:
                edge_weights_all.update(ret_value)

    print('\ngraynet computation done.')

    if return_results:
        return edge_weights_all
    else:
        return None


def extract_per_subject_cortical(input_dir, base_feature, roi_labels, centroids,
                                 weight_method_list, atlas_spec, atlas_name,
                                 smoothing_param, node_size,
                                 num_bins, edge_range, out_dir, return_results,
                                 pretty_print_options, subject=None):
    # purposefully leaving subject parameter last to enable partial function creation
    """Extracts give set of weights for one subject."""

    if subject is None:
        return

    print('')

    try:
        features = import_features(input_dir,
                                   [subject, ],
                                   base_feature,
                                   fwhm=smoothing_param,
                                   atlas=atlas_spec)
    except:
        traceback.print_exc()
        warnings.warn('Unable to read {} features for {}\n Skipping it.'.format(
                base_feature, subject), UserWarning)
        return

    data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name)

    max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options

    if return_results:
        edge_weights_all = dict()
    else:
        edge_weights_all = None

    for ww, weight_method in enumerate(weight_method_list):
        # unique stamp for each subject and weight
        expt_id = stamp_expt_weight(base_feature, atlas_name, smoothing_param,
                                    node_size, weight_method)
        sys.stdout.write(
            '\nProcessing {sid:{id_width}} -- weight {wm:{wtname_width}} '
            '({wc:{nd_wm}}/{nw:{nd_wm}}) :\n'
            ''.format(sid=subject, wm=weight_method, wc=ww + 1, nw=num_weights,
                      nd_id=nd_id, nd_wm=nd_wm, id_width=max_id_width,
                      wtname_width=max_wtname_width))

        # actual computation of pair-wise features
        try:
            graph = hiwenet.extract(data,
                                    rois,
                                    weight_method=weight_method,
                                    num_bins=num_bins,
                                    edge_range=edge_range,
                                    return_networkx_graph=True)

            # retrieving edge weights
            weight_vec = np.array(list(
                    nx.get_edge_attributes(graph, 'weight').values()))
            warn_nan(weight_vec)

            # adding position info to nodes (for visualization later)
            for roi in centroids:
                graph.nodes[roi]['x'] = float(centroids[roi][0])
                graph.nodes[roi]['y'] = float(centroids[roi][1])
                graph.nodes[roi]['z'] = float(centroids[roi][2])

            if return_results:
                edge_weights_all[(weight_method, subject)] = weight_vec

            # saving to disk
            try:
                save(weight_vec, out_dir, subject, expt_id)
                save_per_subject_graph(graph, out_dir, subject, expt_id)
            except:
                raise IOError('Unable to save the network or vectorized weights '
                               'to:\n{}'.format(out_dir))

        except (RuntimeError, RuntimeWarning) as runexc:
            print(runexc)
        except KeyboardInterrupt:
            print('Exiting on keyborad interrupt! \n'
                  'Abandoning the remaining processing for {} weights:\n'
                  '{}.'.format(num_weights - ww, weight_method_list[ww:]))
            sys.exit(1)
        except:
            print('Unable to extract {} features for {}'.format(weight_method, subject))
            traceback.print_exc()


    return edge_weights_all


[docs]def roiwise_stats_indiv(subject_id_list, input_dir,
                        base_feature=cfg.default_feature_single_edge,
                        chosen_roi_stats=cfg.default_roi_statistic,
                        atlas=cfg.default_atlas,
                        smoothing_param=cfg.default_smoothing_param,
                        node_size=cfg.default_node_size,
                        out_dir=None, return_results=False):
    """
    Computes the chosen summary statistics within each ROI.
    These summary stats (such as median) can help serve as a baseline for
    network-level values produced by graynet.

    Options for summary statistics include 'median', 'entropy', 'kurtosis' and
    any other appropriate summary statistics listed under scipy.stats:
    https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions

    Parameters
    ----------
    subject_id_list : str or list
        must be path to a file containing subject IDs, or a list of subject IDs

    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.

    base_feature : str
        Specific type of feature to read for each subject from the input directory.

    chosen_roi_stats : list of str or callable
        If requested, graynet will compute chosen summary statistics (such as median)
        within each ROI of the chosen parcellation (and network weight computation is skipped).
        Default: 'median'. Supported summary statistics include 'median', 'mode',
        'mean', 'std', 'gmean', 'hmean', 'variation', 'entropy', 'skew' and 'kurtosis'

        Other appropriate summary statistics listed under scipy.stats could used
        by passing in a callable with their parameters encapsulated:
        https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions
        For example, if you would like to compute 3rd k-statistic, you could
        construct a callable and passing ``third_kstat`` as in the argument:

        .. code-block:: python

            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)
            roi_medians = roiwise_stats_indiv(subject_id_list, fs_dir, base_feature, chosen_measure = third_kstat,
                atlas, fwhm, out_dir=None, return_results=True)

        Other possible options could trimmed mean estimator with 5% outliers removed or 3rd k-statistic:
        .. code-block:: python
            trimmed_mean = lambda array: scipy.stats.trim_mean(array, proportiontocut = 0.05)
            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)

        Notes: 'hmean' requires all values be positive.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Available choices for cortical parcellation: ['fsaverage', 'glasser2016'].
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches,
        depending on type of atlas or feature. NOT implemented yet.

    out_dir : str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicating whether to keep the results to be returned to caller method.
        Helps to save memory (as it doesn't retain results all subjects and weight
        combinations), when running from command line interface (or HPC).
        Default: False
        If this is False, out_dir must be specified to save the results to disk.

    Returns
    -------
    roi_stats_all : dict, None
        If return_results is True, this will be a dictionary keyed in by subject_ID
        The value of each key roi_summary_all[subject] is
        a numpy array of length k, with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    check_params_single_edge(base_feature, input_dir, atlas, smoothing_param,
                             node_size, out_dir, return_results)
    subject_id_list, num_subjects, max_id_width, nd_id = check_subjects(subject_id_list)
    stat_func_list, stat_func_names, num_stats, \
        max_stat_width, nd_st = check_stat_methods(chosen_roi_stats)


    if base_feature in cfg.features_cortical:
        uniq_rois, centroids, roi_labels = roi_labels_centroids(atlas)
        null_roi_to_be_ignored = cfg.null_roi_name
    elif base_feature in cfg.features_volumetric:
        uniq_rois, centroids, roi_labels = volumetric_roi_info(atlas)
        null_roi_to_be_ignored = cfg.null_roi_index
    else:
        raise ValueError('Unrecognized type of base_feature! Must be one of {}'
                         ''.format(cfg.base_feature_list))

    print('\nProcessing {} features resampled to {} atlas,'
          ' smoothed at {} with node size {}'.format(base_feature, atlas,
                                                     smoothing_param, node_size))

    if return_results:
        roi_stats_all = dict()
    else:
        roi_stats_all = None
        if out_dir is None:
            raise ValueError('When return_results=False, out_dir must be specified '
                             'to be able to save the results.')
        if not out_dir.exists():
            out_dir.mkdir(exist_ok=True, parents=True)

    for sub_idx, subject in enumerate(subject_id_list):

        try:
            features = import_features(input_dir, [subject, ], base_feature,
                                       atlas=atlas,
                                       fwhm=smoothing_param)
        except:
            raise IOError(
                'Unable to read {} features for {}\n'
                ' Skipping it.'.format(base_feature, subject))

        data, rois = mask_background_roi(features[subject], roi_labels,
                                         null_roi_to_be_ignored)

        for ss, stat_func in enumerate(stat_func_list):
            sys.stdout.write(
                '\nProcessing id {sid:{id_width}} '
                '({sidnum:{nd_id}}/{numsub:{nd_id}}) -- '
                'statistic {stname:{stat_name_width}} '
                '({statnum:{nd_st}}/{numst:{nd_st}})'
                ' :'.format(sid=subject, sidnum=sub_idx + 1, numsub=num_subjects,
                            stname=stat_func_names[ss], statnum=ss + 1, numst=num_stats,
                            id_width=max_id_width, stat_name_width=max_stat_width,
                            nd_id=nd_id, nd_st=nd_st))

            try:
                roi_stats = calc_roi_statistics(data, rois, uniq_rois, stat_func)
                expt_id_no_network = stamp_experiment(base_feature, stat_func_names[ss],
                                                      atlas, smoothing_param, node_size)
                save_summary_stats(roi_stats, uniq_rois, stat_func_names[ss], out_dir,
                                   subject, expt_id_no_network)
                sys.stdout.write('Done.')
            except KeyboardInterrupt:
                print('Exiting on keyborad interrupt! \n'
                      'Abandoning the remaining processing for {} stats:\n'
                      '{}.'.format(num_stats - ss, stat_func_names[ss:]))
                sys.exit(1)
            except:
                traceback.print_exc()
                logging.debug(
                    'Error : unable to compute roi-wise {} for {}.'
                    ' Skipping it.'.format(stat_func_names[ss], subject))

        if return_results:
            roi_stats_all[subject] = roi_stats

    return roi_stats_all


def cli_run():
    """command line interface!"""

    subject_ids_path, input_dir, base_feature_list, \
    weight_method, do_multi_edge, summary_stats, multi_edge_range, \
    num_bins, edge_range, atlas, out_dir, node_size, smoothing_param, \
    roi_stats, num_procs, overwrite_results = parse_args()

    # save options to out folder for future ref
    try:
        user_opt = [subject_ids_path, input_dir, base_feature_list, weight_method,
                    do_multi_edge, summary_stats, multi_edge_range, num_bins,
                    edge_range, atlas, out_dir, node_size, smoothing_param,
                    roi_stats, num_procs, overwrite_results]
        with open(out_dir.joinpath('user_options.pkl'), 'wb') as of:
            pickle.dump(user_opt, of)
    except:
        # ignore
        traceback.print_exc()

    # when run from CLI, results will not be received
    # so no point in wasting memory maintaining a very big array
    return_results = False

    if do_multi_edge:
        from graynet.multi_edge import extract_multiedge
        print('Computing multiple edges ... ')
        extract_multiedge(subject_ids_path, input_dir,
                          base_feature_list=base_feature_list,
                          weight_method_list=weight_method,
                          summary_stats=summary_stats,
                          num_bins=num_bins, edge_range_dict=multi_edge_range,
                          atlas=atlas, smoothing_param=smoothing_param,
                          node_size=node_size, out_dir=out_dir,
                          return_results=return_results, num_procs=num_procs,
                          overwrite_results=overwrite_results)
    else:
        base_feature = base_feature_list[0]
        if weight_method is not None:
            print('Computing single edge ... ')
            extract(subject_ids_path, input_dir,
                    base_feature=base_feature,
                    weight_method_list=weight_method,
                    num_bins=num_bins, edge_range=edge_range,
                    atlas=atlas, smoothing_param=smoothing_param,
                    node_size=node_size, out_dir=out_dir,
                    return_results=return_results, num_procs=num_procs)
        else:
            print('Computing ROI summary stats --'
                  ' skipping computation of any network weights.')
            roiwise_stats_indiv(subject_ids_path, input_dir, base_feature,
                                roi_stats, atlas, smoothing_param, node_size,
                                out_dir, return_results)

    return


def get_parser():
    """Method to specify arguments and defaults. """

    help_text_subject_ids = "Path to file containing list of subject IDs " \
                            "(one per line)"

    help_text_input_dir = "Path to a folder containing input data. It could, " \
                          "for example, be a Freesurfer SUBJECTS_DIR, if the " \
                          "chosen feature is from Freesurfer output."

    help_text_feature = "Type of feature to be used for analysis.\n\n" \
                        "Default: ``{}`` \n\nChoices: {}" \
                        "".format(cfg.default_feature_single_edge[0],
                                  ', '.join(cfg.base_feature_list))

    help_text_multi_edge = "Option to compute multiple edges between ROIs based on " \
                           "different features. Default False. If True, two valid " \
                           "features must be specified. Use --multi_edge_range to " \
                           "specify edge ranges for each feature to be processed."

    help_text_summary_stat = "Summary statistic [one or more] to compute on all " \
                             "the weights from multiple edges.This must be a " \
                             "string representing a method (like 'median', " \
                             "'prod' or 'max'),  that is available as a member of " \
                             "numpy or scipy.stats."

    help_text_weight = "List of methods used to estimate the weight of the edge " \
                       "between the pair of nodes.\n\nDefault : {}.\n\n" \
                       "Available options:" \
                       "\n\n **histogram**-based: {}" \
                       "\n\n weights on **original** features : {}" \
                       "".format(cfg.default_weight_method[0],
                                 ', '.join(cfg.histogram_weights),
                                 ', '.join(cfg.weights_on_original_features))

    help_text_num_bins = "Number of bins used to construct the histogram within " \
                         "each ROI or group. " \
                         "Default : {}".format(cfg.default_num_bins)

    help_text_edge_range = "The range of edges (two finite values) within which to " \
                           "bin the given values e.g. ``--edge_range 0.0 5.0`` " \
                           ".Setting this is *crucial* to ensure correspondence " \
                           "across multiple invocations of graynet, for different " \
                           "subjects, in terms of range across all bins as well as " \
                           "individual bin edges.\n\n" \
                           "Default : {}, to automatically compute from the given values." \
                           "".format(cfg.default_edge_range)

    help_text_multi_edge_range = """Set of edge ranges (for each of the features) 
    within which to bin the given values - see above. For example, \n\n
    ``-f freesurfer_thickness freesurfer_curv --edge_range 0.0 5.0 -0.3 +0.3``\n\n 
    will set the a range of [0.0, 5.0] for thickness and [-0.3, 0.3] for curv.\n\n 
    Default : {}.""".format(cfg.edge_range_predefined)

    help_text_roi_stats = "Option to compute summary statistics within each ROI of " \
                          "the chosen parcellation. These statistics (such as the " \
                          "median) can serve as a baseline for network-level " \
                          "values produced by graynet. Options for summary " \
                          "statistics include 'median', 'entropy', 'kurtosis' and " \
                          "any other appropriate summary statistics listed under " \
                          "scipy.stats:  " \
                          "https://docs.scipy.org/doc/scipy/reference/stats.html" \
                          "#statistical-functions . When this option is chosen, " \
                          "network computation is not allowed. You need to compute " \
                          "networks and ROI stats separately."

    help_text_atlas = "Name or path to atlas to containing the parcellation of " \
                      "ROIs.\nFor cortical features, you can also specify the " \
                      "absolute path for the Freesurfer parcellation of that " \
                      "atlas. This directory must have the standard Freesurfer " \
                      "structure, with the following key files that must exist: " \
                      "``label/?h.aparc.annot`` and " \
                      "``surf/?h.orig``.\n\n*Cortical* atlases supported: " \
                      "``fsaverage`` and ``glasser2016``. In addition, you can " \
                      "also specify an absolute path to the Freesurfer processing " \
                      "of any arbitrary atlas. Read these instructions before " \
                      "trying: https://raamana.github.io/graynet/cortical.html" \
                      "#using-a-different-atlas\n\n\n*Volumetric* atlases " \
                      "supported for CAT12 features: ``cat_aal``, ``cat_lpba40``, " \
                      "and ``cat_ibsr``.In addition, you can also directly specify " \
                      "an absolute path to a single 3D volume. Make sure name this " \
                      "file properly as itwould be used to encode all the " \
                      "processing i.e. make it clean as well as fully reflective " \
                      "of the properties of the parcellation inside.\n\nDefault: " \
                      "``{}``".format(cfg.default_atlas)

    help_text_parc_size = "Size of individual patch for the atlas parcellation, " \
                          "in terms of minimum number of vertices per patch. " \
                          "This is *ONLY* valid for cortical version of graynet, " \
                          "when using atlas fsaverage only, that has precomputed " \
                          "parcellation for a set of predetermined patch sizes.\n" \
                          "\nAllowed values: (None, 250, 500, 1000, 2000, 3000, " \
                          "5000, 10000). \n" \
                          "\nDefault : {}".format(cfg.default_node_size)

    help_text_smoothing = "Smoothing parameter for feature. " \
                          "Default: FWHM of {} for Freesurfer thickness." \
                          "".format(cfg.default_smoothing_param)

    help_text_num_procs = "Number of CPUs to use in parallel to speed up " \
                          "processing. " \
                          "Default : {}, capping at available number of CPUs in " \
                          "the processing node.".format(cfg.default_num_procs)

    help_text_overwrite_results = "Flag to request overwriting of existing " \
                                  "results, in case of reruns/failed jobs. " \
                                  "By default, if the expected output file exists " \
                                  "and is of non-zero size, " \
                                  "its computation is skipped (assuming the file " \
                                  "is complete, usable and not corrupted)."

    parser = argparse.ArgumentParser(prog="graynet")

    parser.add_argument("-s", "--subject_ids_path", action="store",
                        dest="subject_ids_path",
                        required=False, default=None,
                        help=help_text_subject_ids)

    parser.add_argument("-i", "--input_dir", action="store", dest="input_dir",
                        required=True, help=help_text_input_dir)

    parser.add_argument("-f", "--feature", action="store",
                        dest="features",
                        nargs='*',
                        default=cfg.default_feature_single_edge, required=False,
                        help=help_text_feature)

    parser.add_argument("-o", "--out_dir", action="store", dest="out_dir",
                        default=None, required=False,
                        help="Where to save the extracted features. ")

    method_selector = parser.add_argument_group(
            title='Type of computation',
            description='Choose one among single edge, multiedge or simply ROI ' \
                        'stats.')

    # method_selector = parser.add_argument_group(required=True)
    method_selector.add_argument("-w", "--weight_method", action="store",
                                 dest="weight_methods",
                                 nargs='*',
                                 default=None, required=False, help=help_text_weight)

    method_selector.add_argument("-r", "--roi_stats", action="store",
                                 dest="roi_stats",
                                 nargs='*', default=None, help=help_text_roi_stats)

    method_selector.add_argument("-m", "--do_multi_edge", action="store_true",
                                 dest="do_multi_edge",
                                 default=False, required=False,
                                 help=help_text_multi_edge)

    method_params = parser.add_argument_group(title='Weight parameters',
                                              description='Parameters relevant to '
                                                          'histogram edge weight '
                                                          'calculations')

    method_params.add_argument("-b", "--num_bins", action="store", dest="num_bins",
                               default=cfg.default_num_bins, required=False,
                               help=help_text_num_bins)

    method_params.add_argument("-e", "--edge_range", action="store",
                               dest="edge_range",
                               default=cfg.default_edge_range,
                               required=False, #TODO perhaps make this required?
                               # to ensure users compute it from the entire dataset!
                               nargs=2, metavar=('min', 'max'),
                               help=help_text_edge_range)

    multiedge_args = parser.add_argument_group(title='Multi-edge',
                                               description='Parameters related to '
                                                           'computation of '
                                                           'multiple edges')

    multiedge_args.add_argument("-t", "--summary_stat", action="store",
                                dest="summary_stat",
                                nargs='*',
                                default=cfg.multi_edge_summary_func_default,
                                required=False,
                                help=help_text_summary_stat)

    multiedge_args.add_argument("-l", "--multi_edge_range", action="store",
                                dest="multi_edge_range",
                                default=None, required=False, metavar=('min max'),
                                nargs='*', help=help_text_multi_edge_range)

    atlas_params = parser.add_argument_group(title='Atlas',
                                             description="Parameters describing "
                                                         "the atlas, "
                                                         "its parcellation and any "
                                                         "smoothing of features.")
    atlas_params.add_argument("-a", "--atlas", action="store", dest="atlas",
                              default=cfg.default_atlas, required=False,
                              help=help_text_atlas)

    atlas_params.add_argument("-n", "--node_size", action="store", dest="node_size",
                              default=cfg.default_node_size, required=False,
                              help=help_text_parc_size)

    atlas_params.add_argument("-p", "--smoothing_param", action="store",
                              dest="smoothing_param",
                              default=cfg.default_smoothing_param, required=False,
                              help=help_text_smoothing)

    computing_params = parser.add_argument_group(title='Computing',
                                                 description='Options related to '
                                                             'computing and '
                                                             'parallelization.')

    computing_params.add_argument('-c', '--num_procs', action='store',
                                  dest='num_procs',
                                  default=cfg.default_num_procs, required=False,
                                  help=help_text_num_procs)
    computing_params.add_argument('-d', '--overwrite_results', action='store_true',
                                  dest='overwrite_results',
                                  required=False, help=help_text_overwrite_results)

    computing_params.add_argument('-v', '--version', action='version',
                                  version='%(prog)s {version}'.format(
                                      version=__version__))

    return parser


def parse_args():
    """Parser/validator for the cmd line args."""

    parser = get_parser()

    if len(sys.argv) < 2:
        parser.print_help()
        print('\nToo few arguments!')
        parser.exit(1)

    # parsing
    try:
        params = parser.parse_args()
    except Exception as exc:
        print(exc)
        raise ValueError('Unable to parse command-line arguments.')

    feature_list = utils.check_features(params.features)

    input_dir = Path(params.input_dir).resolve()
    if not input_dir.exists():
        raise IOError("Given input directory doesn't exist!")

    out_dir = params.out_dir
    if out_dir is not None:
        out_dir = Path(out_dir).resolve()
    else:
        out_dir = input_dir / "graynet"

    if not out_dir.exists():
        out_dir.mkdir(exist_ok=True, parents=True)

    # allowing auto population of subject IDs for freesurfer directory
    sub_id_list_path = params.subject_ids_path
    if sub_id_list_path is None:
        # this is allowed only when all features are freesurfer-related only
        for feat in feature_list:
            if feat not in cfg.features_freesurfer:
                raise ValueError("Path to subject ID list must be specified "
                                 "when non-Freesurfer features are being processed!")

        # get all IDs in Freesurfer $SUBJECTS_DIR that are folders with surf subdir
        id_list = [sub_id for sub_id in input_dir.iterdir()
                   if (sub_id.is_dir() and sub_id.joinpath('surf').is_dir() )]

        if len(id_list) < 1:
            raise ValueError('Freesurfer folder does not have any '
                             'usable subjects: \n{}'.format(input_dir))

        # write to a file in out folder
        try:
            sub_id_list_path = input_dir / 'id_list_freesurfer_graynet.txt'
            with open(sub_id_list_path, 'w') as idlf:
                idlf.writelines('\n'.join(id_list))
        except:
            raise IOError('Unable to write auto generated id list (n={}) to disk'
                          ' to\n  {}'.format(len(id_list), sub_id_list_path))
    else:
        sub_id_list_path = Path(params.subject_ids_path).resolve()
        if not sub_id_list_path.exists():
            raise IOError("Given subject IDs file doesn't exist.")

    do_multi_edge = bool(params.do_multi_edge)
    summary_stat = params.summary_stat
    multi_edge_range = np.array(params.multi_edge_range, dtype=float)
    multi_edge_range_out = None
    if do_multi_edge:
        # ensure atleast two features
        num_features = len(feature_list)
        if num_features < 2:
            raise ValueError( 'To enable multi-edge computation, specify atleast '
                              'two valid features.')

        if multi_edge_range is not None:
            nvals_per_feat = 2
            if len(multi_edge_range) != nvals_per_feat * num_features:
                raise ValueError(
                    'Insufficient specification of edge ranges for multiple features!'
                    '\nNeeded : {} exactly, given : {}'
                    ''.format(nvals_per_feat *num_features, len(multi_edge_range)))
            indiv_ranges = np.split(multi_edge_range,
                                    range(nvals_per_feat, len(multi_edge_range),
                                          nvals_per_feat))

            multi_edge_range_out = dict()
            for ix, feat in enumerate(feature_list):
                multi_edge_range_out[feat] = indiv_ranges[ix]

        utils.check_stat_methods(summary_stat)
    else:
        summary_stat = None
        if len(feature_list) > 1:
            raise ValueError('For single edge computation, '
                             'only one feature can be specified.')

    # validating choices and doing only one of the two
    weight_methods = params.weight_methods
    roi_stats = params.roi_stats
    if weight_methods is not None:
        weight_method_list, _, _, _ = check_weights(weight_methods)
        if roi_stats is not None:
            print('ROI stats requested with network weights computation - not allowed.')
            sys.exit(1)
        roi_stats = None
    elif roi_stats is not None:
        roi_stats, _, _, _, _ = check_stat_methods(roi_stats)
        weight_method_list = None
    else:
        raise ValueError('One of weight_method and roi_stats must be chosen.')

    if params.node_size is not None:
        node_size = int(params.node_size)
    else:
        node_size = None

    print('\nData resampled to {} atlas, '
          ' smoothed at {} with node size {}'
          ''.format(params.atlas, params.smoothing_param, params.node_size))

    atlas_spec, _ = check_atlas(params.atlas)
    # num_procs will be validated inside in the functions using it.

    # TODO should we check atlas compatibility with data for two subjects randomly?
    #  load data for subjects, check atlas parcellation is compatible in size with data

    return sub_id_list_path, input_dir, \
           feature_list, weight_method_list, \
           do_multi_edge, summary_stat, multi_edge_range_out, \
           params.num_bins, params.edge_range, \
           atlas_spec, out_dir, node_size, params.smoothing_param, roi_stats, \
           params.num_procs, params.overwrite_results


if __name__ == '__main__':
    cli_run()
graynet 0.5+2.gd353520.dirty documentation

Source code for run_workflow