Source code for

import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from collections import defaultdict
from natsort import natsorted

    from rpy2 import robjects as ro
    from rpy2.robjects import pandas2ri
    from rpy2.robjects.packages import importr
    from rpy2.rinterface_lib import embedded
    # sys.setrecursionlimit(10000)

    #Call R
    R = ro.r
    R('options(stringsAsFactors = FALSE)')

    #Call R packages
    base = importr('base')
    stats = importr('stats')
    WGCNA = importr('WGCNA')
    flashClust = importr('flashClust')
except ImportError:
    print("WGCNA functions will not work. Module Rpy2 not installed.")
except Exception as err:
    print("WGCNA functions will not work. Missing installation. Error: {}".format(err))

[docs]def get_data(data, drop_cols_exp=['subject', 'group', 'sample', 'index'], drop_cols_cli=['subject', 'group', 'biological_sample', 'index'], sd_cutoff=0): """ This function cleanes up and formats experimental and clinical data into similarly shaped dataframes. :param dict data: dictionary with processed clinical and proteomics datasets. :param list drop_cols_exp: list of columns to drop from processed experimental (protemics/rna-seq/dna-seq) dataframe. :param list drop_cols_cli: list of columns to drop from processed clinical dataframe. :return: Dictionary with experimental and clinical dataframes (keys are the same as in the input dictionary). """ wgcna_data = {} for i in data: if data[i] is not None: df = data[i] if i == 'clinical': df.drop_duplicates(keep='first', inplace=True) df = df.reset_index() df['rows'] = df[['subject', 'group']].apply(lambda x: '_'.join(x), axis=1) df.set_index(['rows'], inplace=True) df = df.reindex(index=natsorted(df.index)) df = df.drop(drop_cols_cli, axis=1) else: df = df.reset_index() df['rows'] = df[['subject', 'group']].apply(lambda x: '_'.join(x), axis=1) df.set_index(['rows'], inplace=True) df = df.reindex(index=natsorted(df.index)) df = df.drop(drop_cols_exp, axis=1) if sd_cutoff > 0: df = df.loc[:, df.std() > sd_cutoff] wgcna_data[i] = df return wgcna_data
[docs]def get_dendrogram(df, labels, distfun='euclidean', linkagefun='ward', div_clusters=False, fcluster_method='distance', fcluster_cutoff=15): """ This function calculates the distance matrix and performs hierarchical cluster analysis on a set of dissimilarities and methods for analyzing it. :param df: pandas dataframe with samples/subjects as index and features as columns. :param list labels: labels for the leaves of the tree. :param str distfun: distance measure to be used ('euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski' or 'jaccard'). :param str linkagefun: hierarchical/agglomeration method to be used ('single', 'complete', 'average', 'weighted', 'centroid', 'median' or 'ward'). :param bool div_clusters: dividing dendrogram leaves into clusters (True or False). :param str fcluster_method: criterion to use in forming flat clusters. :param int fcluster_cutoff: maximum cophenetic distance between observations in each cluster. :return: Dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'. If div_clusters is used, it will also return a dictionary of each cluster and respective leaves. """ # np.random.seed(112736) if df is not None and len(labels) > 0: if distfun is None: dist = np.asarray(stats.as_dist(df)) else: dist = np.asarray(stats.dist(df, method=distfun)) Z = linkage(dist, method=linkagefun) Z_dendrogram = dendrogram(Z, no_plot=True, labels=labels) if div_clusters: clusters = get_clusters_elements(Z, fcluster_method, fcluster_cutoff, labels) return Z_dendrogram, clusters else: return Z_dendrogram return None
[docs]def get_clusters_elements(linkage_matrix, fcluster_method, fcluster_cutoff, labels): """ This function implements the generation of flat clusters from an hierarchical clustering with the same interface as scipy.cluster.hierarchy.fcluster. :param ndarray linkage_matrix: hierarchical clustering encoded with a linkage matrix. :param str fcluster_method: criterion to use in forming flat clusters ('inconsistent', 'distance', 'maxclust', 'monocrit', 'maxclust_monocrit'). :param float fcluster_cutoff: maximum cophenetic distance between observations in each cluster. :param list labels: labels for the leaves of the dendrogram. :return: A dictionary where keys are the cluster numbers and values are the dendrogram leaves. """ clust = fcluster(linkage_matrix, fcluster_cutoff, fcluster_method) clusters = defaultdict(list) for i, j in zip(clust, labels): clusters[i].append(j) return clusters
[docs]def filter_df_by_cluster(df, clusters, number): """ Select only the members of a defined cluster. :param df: pandas dataframe with samples/subjects as index and features as columns. :param dict clusters: clusters dictionary from get_dendrogram function if div_clusters option was True. :param int number: cluster number (key). :return: Pandas dataframe with all the features (columns) and samples/subjects belonging to the defined cluster (index). """ return df[df.index.isin(clusters[number])]
[docs]def df_sort_by_dendrogram(df, Z_dendrogram): """ Reorders pandas dataframe by index and according to the dendrogram list of leaf nodes labels. :param df: pandas dataframe with the labels to be reordered as index. :param dict Z_dendrogram: dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'. :return: Reordered pandas dataframe. """ data = df.copy() data.index = pd.CategoricalIndex(data.index, categories=Z_dendrogram['ivl']) data.sort_index(level=0, inplace=True) return data
[docs]def get_percentiles_heatmap(df, Z_dendrogram, bydendro= True, bycols=False): """ This function transforms the absolute values in each row or column (option 'bycols') into relative values. :param df: pandas dataframe with samples/subjects as index and features as columns. :param dict Z_dendrogram: dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'. :param bool bydendro: if labels should be ordered according to dendrogram list of leaf nodes labels set to True, otherwise set to False. :param bool bycols: relative values calculated across rows (samples) then set to False. Calculation performed across columns (features) set to True. :return: Pandas dataframe. """ if bydendro: df2 = df_sort_by_dendrogram(df, Z_dendrogram) else: df2 = df p = pd.DataFrame(index=df2.index, columns=df2.columns) if bycols: for j in df2.index: for i in df2.columns: pct = (df2.loc[j,i] - np.nanmin(df2.loc[j,:])) / ((np.nanmax(df2.loc[j, :]) - np.nanmin(df2.loc[j, :])) * 1.) pct = pct - (pct - 0.5) * 1. / 40 #have to rescale it to account for endpoints of cmaps p.loc[j,i] = pct else: for i in df2.index: for j in df2.columns: pct = (df2.loc[i,j] - np.nanmin(df2.loc[:,j])) / ((np.nanmax(df2.loc[:,j]) - np.nanmin(df2.loc[:,j])) * 1.) pct = pct - (pct - 0.5) * 1. / 40 #have to rescale it to account for endpoints of cmaps p.loc[i,j] = pct return p
[docs]def get_miss_values_df(data): """ Proccesses pandas dataframe so missing values can be plotted in heatmap with specific color. :param data: pandas dataframe. :return: Pandas dataframe with missing values as integer 1, and originally valid values as NaN. """ df = data.copy() df = df.isnull().astype('int') df = df.replace(0, np.nan) return df
[docs]def paste_matrices(matrix1, matrix2, rows, cols): """ Takes two matrices with analog shapes and concatenates each value in matrix 1 with corresponding one in matrix 2, returning a single pandas dataframe. :param ndarray matrix1: input 1 :param ndarray matrix2: input 2 :return: Pandas dataframe. """ #a = pandas2ri.ri2py(matrix1) #b = pandas2ri.ri2py(matrix2) text = [] for i, j in zip(matrix1, matrix2): for x, y in zip(i, j): text.append(('{:0.2}<br>{:.0e}'.format(x, y))) text = np.array(text) text.shape = (matrix1.shape[0], matrix1.shape[1]) textMatrix = pd.DataFrame(text, index=rows, columns=cols) return textMatrix
[docs]def cutreeDynamic(distmatrix, linkagefun='average', minModuleSize=50, method='hybrid', deepSplit=2, pamRespectsDendro=False, distfun=None): """ This function implements the R cutreeDynamic wrapper in Python, provinding an access point for methods of adaptive branh pruning of hierarchical clustering dendrograms. :param data: pandas dataframe. :param str distfun: distance measure to be used ('euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski' or 'jaccard'). :param str linkagefun: hierarchical/agglomeration method to be used ('single', 'complete', 'average', 'weighted', 'centroid', 'median' or 'ward'). :param int minModuleSize: minimum module size. :param str method: method to use ('hybrid' or 'tree'). :param int deepSplit: provides a rough control over sensitivity to cluster splitting, the higher the value (with 'hybrid' method) or if True (with 'tree' method), the more and smaller modules. :param bool pamRespectsDendro: only used for method 'hybrid'. Objects and small modules will only be assigned to modules that belong to the same branch in the dendrogram structure. :return: Numpy array of numerical labels giving assignment of objects to modules. Unassigned objects are labeled 0, the largest module has label 1, next largest 2 etc. """ #if distfun is None: # dist = stats.as_dist(distmatrix) #else: # dist = stats.dist(distmatrix, method = distfun) R_function = R(''' clusters <- function(distmatrix, linkagefun, method, minModuleSize, deepSplit, pamRespectsDendro) { cutreeDynamic(dendro=flashClust(as.dist(distmatrix),method=linkagefun), distM=distmatrix, method=method, deepSplit=deepSplit, pamRespectsDendro=pamRespectsDendro, minClusterSize=minModuleSize)} ''') cutree = R_function(distmatrix, linkagefun=linkagefun, method=method, minModuleSize=minModuleSize, deepSplit=deepSplit, pamRespectsDendro=pamRespectsDendro) return np.array(cutree)
[docs]def build_network(data, softPower=6, networkType='unsigned', linkagefun='average', method='hybrid', minModuleSize=50, deepSplit=2, pamRespectsDendro=False, merge_modules=True, MEDissThres=0.4, verbose=0): """ Weighted gene network construction and module detection. Calculates co-expression similarity and adjacency, topological overlap matrix (TOM) and clusters features in modules. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param int softPower: soft-thresholding power. :param str networkType: network type ('unsigned', 'signed', 'signed hybrid', 'distance'). :param str linkagefun: hierarchical/agglomeration method to be used ('single', 'complete', 'average', 'weighted', 'centroid', 'median' or 'ward'). :param str method: method to use ('hybrid' or 'tree'). :param int minModuleSize: minimum module size. :paran int deepSplit: provides a rough control over sensitivity to cluster splitting, the higher the value (with 'hybrid' method) or if True (with 'tree' method), the more and smaller modules. :param bool pamRespectsDendro: only used for method 'hybrid'. Objects and small modules will only be assigned to modules that belong to the same branch in the dendrogram structure. :param bool merge_modules: if True, very similar modules are merged. :param float MEDissThres: maximum dissimilarity (i.e., 1-correlation) that qualifies modules for merging. :param int verbose: integer level of verbosity. Zero means silent, higher values make the output progressively more and more verbose. :return: Tuple with TOM dissimilarity pandas dataframe, numpy array with module colors per experimental feature. """ #Calculate adjacencies adjacency = WGCNA.adjacency(data, power=softPower, type=networkType) #Transforms the adjacency into topological overlap matrix (TOM) TOM = WGCNA.TOMsimilarity(adjacency, verbose = verbose) #Calculates the corresponding dissimilarity matrix dissTOM = pd.DataFrame(R("1") - TOM) dissTOM.columns = data.columns dissTOM.index = data.columns #Identify co-expression modules moduleColors = identify_module_colors(dissTOM, linkagefun=linkagefun, method=method, minModuleSize=minModuleSize, deepSplit=deepSplit, pamRespectsDendro=pamRespectsDendro) #Merge modules whose expression profiles are very similar if merge_modules == True: MEs, moduleColors = merge_similar_modules(data, moduleColors, MEDissThres=MEDissThres, verbose=verbose) return dissTOM, moduleColors
[docs]def pick_softThreshold(data, RsquaredCut=0.8, networkType='unsigned', verbose=0): """ Analysis of scale free topology for multiple soft thresholding powers. Aids the user in choosing a proper soft-thresholding power for network construction. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param float RsquaredCut: desired minimum scale free topology fitting index R^2. :param str networkType: network type ('unsigned', 'signed', 'signed hybrid', 'distance'). :param int verbose: integer level of verbosity. Zero means silent, higher values make the output progressively more and more verbose. :return: Estimated appropriate soft-thresholding power: the lowest power for which the scale free topology fit R^2 exceeds RsquaredCut. :rtype: int """ powers = np.arange(1,20,1) sft = WGCNA.pickSoftThreshold(data, RsquaredCut=RsquaredCut, powerVector=powers, networkType=networkType, verbose=verbose) softPower = sft.rx2('powerEstimate')[0] return softPower
[docs]def identify_module_colors(matrix, linkagefun='average', method='hybrid', minModuleSize=30, deepSplit=2, pamRespectsDendro=False): """ Identifies co-expression modules and converts the numeric labels into colors. :param matrix: dissimilarity structure as produced by R.stats dist. :param int minModuleSize: minimum module size. :param int deepSplit: provides a rough control over sensitivity to cluster splitting, the higher the value (with 'hybrid' method) or if True (with 'tree' method), the more and smaller modules. :param bool pamRespectsDendro: only used for method 'hybrid'. Objects and small modules will only be assigned to modules that belong to the same branch in the dendrogram structure. :return: Numpy array of strings with module color of each experimental feature. """ dynamicMods = cutreeDynamic(matrix, linkagefun=linkagefun, method=method, minModuleSize=minModuleSize, deepSplit=deepSplit, pamRespectsDendro=pamRespectsDendro) dynamicColors= np.array(WGCNA.labels2colors(dynamicMods)) return dynamicColors
[docs]def calculate_module_eigengenes(data, modColors, softPower=6, dissimilarity=True): """ Calculates modules eigengenes to quantify co-expression similarity of entire modules. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param ndarray modColors: array (numeric, character or a factor) attributing module colors to each feature in the experimental dataframe. :param int softPower: soft-thresholding power. :param dissimilarity: calculates dissimilarity of module eigengenes. :return: Pandas dataframe with calculated module eigengenes. If dissimilarity is set to True, returns a tuple with two pandas dataframes, the first with the module eigengenes and the second with the eigengenes dissimilarity. """ MEs = pd.DataFrame() MEDiss = pd.DataFrame() try: MEList = WGCNA.moduleEigengenes(data, modColors, softPower=softPower) MEs0 = MEList.rx2('eigengenes') MEs = WGCNA.orderMEs(MEs0, verbose=0) if dissimilarity: MEcor = WGCNA.cor(MEs, verbose=0) MEcor = R_wrapper.R_matrix2Py_matrix(MEcor, MEcor.rownames, MEcor.colnames) MEDiss = 1 - MEcor return MEs, MEDiss else: return MEs, MEDiss except embedded.RRuntimeError as err: print(err) return MEs, MEDiss
[docs]def merge_similar_modules(data, modColors, MEDissThres=0.4, verbose=0): """ Merges modules in co-expression network that are too close as measured by the correlation of their eigengenes. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param ndarray modColors: array (numeric, character or a factor) attributing module colors to each feature in the experimental dataframe. :para, float MEDissThres: maximum dissimilarity (i.e., 1-correlation) that qualifies modules for merging. :param int verbose: integer level of verbosity. Zero means silent, higher values make the output progressively more and more verbose. :return: Tuple containing pandas dataframe with eigengenes of the new merged modules, and array with module colors of each expeirmental feature. """ mergedMEs = pd.DataFrame() mergedColors = [] try: merge = WGCNA.mergeCloseModules(data, modColors, cutHeight=MEDissThres, verbose=verbose) mergedColors = merge.rx2('colors') mergedMEs = merge.rx2('newMEs') except embedded.RRuntimeError as err: print(err) return mergedMEs, mergedColors
[docs]def calculate_ModuleTrait_correlation(df_exp, df_traits, MEs): """ Correlates eigengenes with external traits in order to identify the most significant module-trait associations. :param df_exp: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param df_traits: pandas dataframe containing clinical data, with samples/subjects as rows and clinical traits as columns. :param MEs: pandas dataframe with module eigengenes. :return: Tuple with two pandas datafames, first the correlation between all module eigengenes and all clinical traits, second a dataframe with concatenated correlation and p-value used for heatmap annotation. """ nSamples = len(df_exp.index) moduleTraitCor = None textMatrix = None df_traits.columns = df_traits.columns.str.replace(' ', 'space') df_traits.columns = df_traits.columns.str.replace('(', 'parentheses1') df_traits.columns = df_traits.columns.str.replace(')', 'parentheses2') common = list(set(MEs.index).intersection(df_traits.index)) if len(common) > 0: moduleTraitCor_r = WGCNA.cor(MEs.loc[common,:], df_traits.loc[common,:], use='p', verbose=0) moduleTraitPvalue_r = WGCNA.corPvalueStudent(moduleTraitCor_r, nSamples) textMatrix = paste_matrices(moduleTraitCor_r, moduleTraitPvalue_r, MEs.columns, df_traits.columns) moduleTraitCor = pd.DataFrame(moduleTraitCor_r, index=MEs.columns, columns=df_traits.columns) moduleTraitPvalue = pd.DataFrame(moduleTraitPvalue_r, index=MEs.columns, columns=df_traits.columns) moduleTraitCor.columns = moduleTraitCor.columns.str.replace('space', ' ') moduleTraitPvalue.columns = moduleTraitPvalue.columns.str.replace('space', ' ') textMatrix.columns = textMatrix.columns.str.replace('space', ' ') moduleTraitCor.columns = moduleTraitCor.columns.str.replace('parentheses1', '(') moduleTraitCor.columns = moduleTraitCor.columns.str.replace('parentheses2', ')') moduleTraitPvalue.columns = moduleTraitPvalue.columns.str.replace('parentheses1', '(') moduleTraitPvalue.columns = moduleTraitPvalue.columns.str.replace('parentheses2', ')') textMatrix.columns = textMatrix.columns.str.replace('parentheses1', '(') textMatrix.columns = textMatrix.columns.str.replace('parentheses2', ')') return moduleTraitCor, textMatrix
[docs]def calculate_ModuleMembership(data, MEs): """ For each module, calculates the correlation of the module eigengene and the feature expression profile (quantitative measure of module membership (MM)). :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param MEs: pandas dataframe with module eigengenes. :return: Tuple with two pandas dataframes, one with module membership correlations and another with p-values. """ nSamples=len(data.index) data_r = data.copy() data_r.columns = data_r.columns.str.replace('~', 'dash') modLabels = [i[2:] for i in list(MEs.columns)] FeatureModuleMembership = base.as_data_frame(WGCNA.cor(data_r, MEs, use='p', verbose=0)) MMPvalue = base.as_data_frame(WGCNA.corPvalueStudent(base.as_matrix(FeatureModuleMembership), nSamples)) FeatureModuleMembership.columns = ['MM'+str(col) for col in modLabels] FeatureModuleMembership.index = data_r.columns MMPvalue.columns = ['p.MM'+str(col) for col in modLabels] MMPvalue.index = data_r.columns #FeatureModuleMembership = R_wrapper.R_matrix2Py_matrix(FeatureModuleMembership, FeatureModuleMembership.index, FeatureModuleMembership.columns) #MMPvalue = R_wrapper.R_matrix2Py_matrix(MMPvalue, MMPvalue.rownames, MMPvalue.colnames) FeatureModuleMembership.index = data_r.columns.str.replace('dash', '~') MMPvalue.index = MMPvalue.index.str.replace('dash', '~') return FeatureModuleMembership, MMPvalue
[docs]def calculate_FeatureTraitSignificance(df_exp, df_traits): """ Quantifies associations of individual experimental features with the measured clinical traits, by defining Feature Significance (FS) as the absolute value of the correlation between the feature and the trait. :param df_exp: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param df_traits: pandas dataframe containing clinical data, with samples/subjects as rows and clinical traits as columns. :return: Tuple with two pandas dataframes, one with feature significance correlations and another with p-values. """ nSamples=len(df_exp.index) df_exp_r = df_exp.copy() df_exp_r.columns = df_exp_r.columns.str.replace('~', 'dash') df_cli_r = df_traits.copy() df_cli_r.columns = df_cli_r.columns.str.replace(' ', 'space') df_cli_r.columns = df_cli_r.columns.str.replace('(', 'parentheses1') df_cli_r.columns = df_cli_r.columns.str.replace(')', 'parentheses2') common = list(set(df_exp_r.index).intersection(df_cli_r.index)) FeatureTraitSignificance = base.as_data_frame(WGCNA.cor(df_exp_r.loc[common,:], df_cli_r.loc[common,:], use='p', verbose=0)) FSPvalue = base.as_data_frame(WGCNA.corPvalueStudent(base.as_matrix(FeatureTraitSignificance), nSamples)) FeatureTraitSignificance.columns = ['GS.'+str(col) for col in df_cli_r.columns] FeatureTraitSignificance.index = df_exp_r.columns FSPvalue.columns = ['p.GS.'+str(col) for col in df_cli_r.columns] FSPvalue.index = df_exp_r.columns #FeatureTraitSignificance = R_wrapper.R_matrix2Py_matrix(FeatureTraitSignificance, FeatureTraitSignificance.rownames, FeatureTraitSignificance.colnames) #FSPvalue = R_wrapper.R_matrix2Py_matrix(FSPvalue, FSPvalue.rownames, FSPvalue.colnames) FeatureTraitSignificance.columns = FeatureTraitSignificance.columns.str.replace('space', ' ') FeatureTraitSignificance.columns = FeatureTraitSignificance.columns.str.replace('parentheses1', '(') FeatureTraitSignificance.columns = FeatureTraitSignificance.columns.str.replace('parentheses2', ')') FeatureTraitSignificance.index = FeatureTraitSignificance.index.str.replace('dash', '~') FSPvalue.columns = df_cli_r.columns.str.replace('space', ' ') FSPvalue.columns = FSPvalue.columns.str.replace('parentheses1', '(') FSPvalue.columns = FSPvalue.columns.str.replace('parentheses2', ')') FSPvalue.index = FSPvalue.index.str.replace('dash', '~') return FeatureTraitSignificance, FSPvalue
[docs]def get_FeaturesPerModule(data, modColors, mode='dictionary'): """ Groups all experimental features by the co-expression module they belong to. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param ndarray modColors: array (numeric, character or a factor) attributing module colors to each feature in the experimental dataframe. :param str mode: type of the value returned by the function ('dictionary' or 'dataframe'). :return: Depending on selected mode, returns a dictionary or dataframe with module color per experimental feature. """ if mode == 'dataframe': features_per_module = dict(zip(data.columns, modColors)) features_per_module = pd.DataFrame(list(features_per_module.items()), columns=['name', 'modColor']) elif mode == 'dictionary': features_per_module = defaultdict(list) for k, v in zip(modColors, data.columns): features_per_module[k].append(v) features_per_module = dict((k, v) for k, v in features_per_module.items()) return features_per_module
[docs]def get_ModuleFeatures(data, modColors, modules=[]): """ Groups and returns a list of the experimental features clustered in specific co-expression modules. :param data: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param ndarray modColors: array (numeric, character or a factor) attributing module colors to each feature in the experimental dataframe. :param list modules: list of module colors of interest. :return: List of lists with experimental features in each selected module. """ allfeatures = get_FeaturesPerModule(data, modColors, mode='dictionary') modules = modules selectfeatures = [allfeatures[x] for x in modules] return selectfeatures
[docs]def get_EigengenesTrait_correlation(MEs, data): """ Eigengenes are used as representative profiles of the co-expression modules, and correlation between them is used to quantify module similarity. Clinical traits are added to the eigengenes to see how the traits fir into the eigengen network. :param MEs: pandas dataframe with module eigengenes. :param data: pandas dataframe containing clinical data, with samples/subjects as rows and clinical traits as columns. :return: Tuple with two pandas dataframes, one with features and traits recalculates module eigengenes dissimilarity, and another with all the overall correlations. """ METDiss = pd.DataFrame() METcor = 0 df_traits_r = data.copy() df_traits_r.columns = df_traits_r.columns.str.replace(' ', 'space') df_traits_r.columns = df_traits_r.columns.str.replace('(', 'parentheses1') df_traits_r.columns = df_traits_r.columns.str.replace(')', 'parentheses2') df_traits_r.columns = df_traits_r.columns.str.replace('/', 'slash') common = list(set(MEs.index).intersection(df_traits_r.index)) if len(common) > 0: MET = WGCNA.orderMEs(base.cbind(MEs.loc[common,:], df_traits_r.loc[common,:]), verbose=0) METcor = WGCNA.cor(MET, use='p', verbose=0) METcor = pd.DataFrame(METcor, MET.columns, MET.columns) METcor.columns = METcor.columns.str.replace('space', ' ') METcor.columns = METcor.columns.str.replace('parentheses1', '(') METcor.columns = METcor.columns.str.replace('parentheses2', ')') METcor.columns = METcor.columns.str.replace('slash', '/') METcor.index = METcor.index.str.replace('space', ' ') METcor.index = METcor.index.str.replace('parentheses1', '(') METcor.index = METcor.index.str.replace('parentheses2', ')') METcor.index = METcor.index.str.replace('slash', '/') METDiss = 1 - METcor return METDiss, METcor