Source code for ckg.analytics_core.viz.wgcnaFigures

import pandas as pd
import numpy as np
import scipy as scp
from ckg.analytics_core.viz import color_list
import plotly.graph_objs as go
import plotly.subplots as tools
from ckg.analytics_core.viz import Dendrogram
from ckg.analytics_core.analytics import wgcnaAnalysis


[docs]def get_module_color_annotation(map_list, col_annotation=False, row_annotation=False, bygene=False, module_colors=[], dendrogram=[]): """ This function takes a list of values, converts them into colors, and creates a new plotly object to be used as an annotation. Options module_colors and dendrogram only apply when map_list is a list of experimental features used in module eigenegenes calculation. :param list map_list: dendrogram leaf labels. :param bool col_annotation: if True, adds color annotations as a row. :param bool row_annotation: if True, adds color annotations as a column. :param bool bygene: determines wether annotation colors have to be reordered to match dendrogram leaf labels. :param list module_colors: dendrogram leaf module color. :param dict dendrogram: dendrogram represented as a plotly object figure. :return: Plotly object figure. .. note:: map_list and module_colors must have the same length. """ colors_dict = color_list.make_color_dict() n = len(map_list) val = 1/(n-1) number = 0 colors = [] vals = [] #Use if color annotation is for experimental features in dendrogram if bygene: module_colors = [i.lower().replace(' ', '') for i in module_colors] gene_colors = dict(zip(map_list, module_colors)) for i in map_list: name = gene_colors[i] color = colors_dict[name] n = number colors.append([round(n,4), color]) vals.append((i, round(n,4))) number = n+val labels = list(dendrogram['layout']['xaxis']['ticktext']) y = [1]*len(labels) df = pd.DataFrame([labels, y], index=['labels', 'y']).T df['vals'] = df['labels'].map(dict(vals)) #Use if map_list is a list of co-expression modules names else: for i in map_list: name = i.split('ME') if len(name) == 2: name = name[1] color = colors_dict[name] n = number colors.append([round(n,4), color]) vals.append((i, round(n,4))) number = n+val else: name = name[0] n = number colors.append([round(n,4), '#ffffff']) vals.append((i, round(n,4))) number = n+val y = [1]*len(map_list) df = pd.DataFrame([map_list, y], index=['labels', 'y']).T df['vals'] = df['labels'].map(dict(vals)) if row_annotation and col_annotation: r_annot = go.Heatmap(z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis='x', yaxis='y') c_annot = go.Heatmap(z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2') return r_annot, c_annot elif row_annotation: r_annot = go.Heatmap(z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2') return r_annot elif col_annotation: c_annot = go.Heatmap(z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2') return c_annot return None
[docs]def get_heatmap(df, colorscale=None, color_missing=True): """ This function plots a simple Plotly heatmap. :param df: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns. :param list[list] colorscale: heatmap colorscale (e.g. [[0,'#67a9cf'],[0.5,'#f7f7f7'],[1,'#ef8a62']]). If colorscale is not defined, will take [[0, 'rgb(255,255,255)'], [1, 'rgb(255,51,0)']] as default. :param bool color_missing: if set to True, plots missing values as grey in the heatmap. :return: Plotly object figure. """ figure = {} if df is not None: if colorscale: colors = colorscale else: colors = [[0, 'rgb(255,255,255)'], [1, 'rgb(255,51,0)']] figure = {'layout': {'template': None}, 'data': []} figure['layout']['template'] = 'plotly_white' figure['data'].append(go.Heatmap(z=df.values.tolist(), y=list(df.index), x=list(df.columns), colorscale=colors, showscale=True, colorbar=dict(x=1, y=0, xanchor='left', yanchor='bottom', len=0.35, thickness=15))) if color_missing: df_missing = wgcnaAnalysis.get_miss_values_df(df) figure['data'].append(go.Heatmap(z=df_missing.values.tolist(), y=list(df.index), x=list(df.columns), colorscale=[[0, 'rgb(201,201,201)'], [1, 'rgb(201,201,201)']], showscale=False)) return figure
[docs]def plot_labeled_heatmap(df, textmatrix, title, colorscale=[[0, 'rgb(0,255,0)'], [0.5, 'rgb(255,255,255)'], [1, 'rgb(255,0,0)']], width=1200, height=800, row_annotation=False, col_annotation=False): """ This function plots a simple Plotly heatmap with column and/or row annotations and heatmap annotations. :param df: pandas dataframe containing data to be plotted in the heatmap. :param textmatrix: pandas dataframe with heatmap annotations as values. :param str title: the title of the figure. :param list[list] colorscale: heatmap colorscale (e.g. [[0,'rgb(0,255,0)'],[0.5,'rgb(255,255,255)'],[1,'rgb(255,0,0)']]) :param int width: the width of the figure. :param int height: the height of the figure. :param bool row_annotation: if True, adds a color-coded column at the left of the heatmap. :param bool col_annotation: if True, adds a color-coded row at the bottom of the heatmap. :return: Plotly object figure. """ figure = {} if df is not None: figure = get_heatmap(df, colorscale=colorscale, color_missing=False) figure['data'].append(get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False)) annotations = [] for n, row in enumerate(textmatrix.values): for m, val in enumerate(row): annotations.append(go.layout.Annotation(text=str(textmatrix.values[n][m]), font=dict(size=8), x=df.columns[m], y=df.index[n], xref='x', yref='y', showarrow=False)) layout = go.Layout(width=width, height=height, title=title, xaxis=dict(domain=[0.015, 1], autorange=True, showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=True, automargin=True, anchor='y'), yaxis=dict(autorange='reversed', ticklen=5, ticks='outside', tickcolor='white', showticklabels=False, automargin=True, showgrid=False, anchor='x'), xaxis2=dict(domain=[0, 0.01], autorange=True, showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=False, automargin=True, anchor='y2'), yaxis2=dict(autorange='reversed', showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=True, automargin=True, anchor='x2')) figure['layout'] = layout figure['layout']['template'] = 'plotly_white' figure['layout'].update(annotations=annotations) return figure
[docs]def plot_dendrogram_guidelines(Z_tree, dendrogram): """ This function takes a dendrogram tree dictionary and its plotly object and creates shapes to be plotted as vertical dashed lines in the dendrogram. :param dict Z_tree: dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'. :param dendrogram: dendrogram represented as a plotly object figure. :return: List of dictionaries. """ shapes = [] if dendrogram is not None: tickvals = list(dendrogram['layout']['xaxis']['tickvals']) maximum = len(tickvals) step = int(maximum/8) minimum = int(0+step) keys = ['type', 'x0', 'y0', 'x1', 'y1', 'line'] line_keys = ['color', 'width', 'dash'] line_vals = ['rgb(192,192,192)', 0.1, 'dot'] line = dict(zip(line_keys,line_vals)) values = [] for i in tickvals[minimum::step]: values.append(('line', i, 0.3, i, np.max(Z_tree['dcoord']))) values = [list(i)+[line] for i in values] shapes = [] for i in values: d = dict(zip(keys, i)) shapes.append(d) return shapes
[docs]def plot_intramodular_correlation(MM, FS, feature_module_df, title, width=1000, height=800): """ This function uses the Feature significance and Module Membership measures, and plots a multi-scatter plot of all modules against all clinical traits. :param MM: pandas dataframe with module membership data :param FS: pandas dataframe with feature significance data :param feature_module_df: pandas DataFrame of experimental features and module colors (use mode='dataframe' in get_FeaturesPerModule) :param str title: plot title :param int width: plot width :param int height: plot height :return: Plotly object figure. Example:: plot = plot_intramodular_correlation(MM, FS, feature_module_df, title='Plot', width=1000, height=800): .. note:: There is a limit in the number of subplots one can make in Plotly. This function limits the number of modules shown to 5. """ figure = {} if MM is not None: MM = MM.iloc[:, -6] MM['modColor'] = MM.index.map(feature_module_df.set_index('name')['modColor'].get) figure = tools.make_subplots(rows=len(FS.columns), cols=len(MM.columns) - 1, shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.015, horizontal_spacing=0.1, print_grid=True) figure.layout.template = 'plotly_white' layout = dict(width=width, height=height, showlegend=False, title=title) figure.layout.update(layout) axis_dict = {} for i, j in enumerate(MM.columns[MM.columns.str.startswith('MM')]): n_p = len(FS.columns) * (len(MM.columns)-1)-len(MM.columns[MM.columns.str.startswith('MM')]) axis_dict['xaxis{}'.format(n_p+i+1)] = dict(title=j, titlefont=dict(size=13)) print(axis_dict) n = 1 for a, b in enumerate(FS.columns): name = b.split(' ') if len(name) > 1: label = ['<br>'.join(name[i:i+3]) for i in range(0, len(name), 3)][0] else: label = name[0] axis_dict['yaxis{}'.format(a+n)] = dict(title=label, titlefont=dict(size=13)) n += len(MM.columns[MM.columns.str.startswith('MM')])-1 annotation = [] x_axis = 1 y_axis = 1 for a, b in enumerate(FS.columns): for i, j in enumerate(MM.columns[MM.columns.str.startswith('MM')]): name = MM[MM['modColor'] == j[2:]].index x = abs(MM[MM['modColor'] == j[2:]][j].values) y = abs(FS[FS.index.isin(name)][b].values) slope, intercept, r_value, p_value, std_err = scp.stats.linregress(x, y) line = slope*x+intercept figure.append_trace(go.Scattergl(x = x, y = y, text = name, mode = 'markers', opacity=0.7, marker={'size': 7, 'color': 'white', 'line': {'width': 1.5, 'color': j[2:]}}), a+1, i+1) figure.append_trace(go.Scattergl(x = x, y = line, mode = 'lines', marker={'color': 'black'}), a+1, i+1) annot = dict(x = 0.7, y = 0.7, xref = 'x{}'.format(x_axis), yref = 'y{}'.format(y_axis), text = 'R={:0.2}, p={:.0e}'.format(r_value, p_value), showarrow = False) annotation.append(annot) x_axis += 1 y_axis += 1 figure.layout.update(axis_dict) figure.layout.update(annotations = annotation) return figure
[docs]def plot_complex_dendrogram(dendro_df, subplot_df, title, dendro_labels=[], distfun='euclidean', linkagefun='average', hang=0.04, subplot='module colors', subplot_colorscale=[], color_missingvals=True, row_annotation=False, col_annotation=False, width=1000, height=800): """ This function plots a dendrogram with a subplot below that can be a heatmap (annotated or not) or module colors. :param dendro_df: pandas dataframe containing data used to generate dendrogram, columns will result in dendrogram leaves. :param subplot_df: pandas dataframe containing data used to generate plot below dendrogram. :param str title: the title of the figure. :param list dendro_labels: list of strings for dendrogram leaf nodes labels. :param str distfun: distance measure to be used (‘euclidean‘, ‘maximum‘, ‘manhattan‘, ‘canberra‘, ‘binary‘, ‘minkowski‘ or ‘jaccard‘). :param str linkagefun: hierarchical/agglomeration method to be used (‘single‘, ‘complete‘, ‘average‘, ‘weighted‘, ‘centroid‘, ‘median‘ or ‘ward‘). :param float hang: height at which the dendrogram leaves should be placed. :param str subplot: type of plot to be shown below the dendrogram (´module colors´ or ´heatmap´). :param list subplot_colorscale: colorscale to be used in the subplot. :param bool color_missingvals: if set to `True`, plots missing values as grey in the heatmap. :param bool row_annotation: if `True`, adds a color-coded column at the left of the heatmap. :param bool col_annotation: if `True`, adds a color-coded row at the bottom of the heatmap. :param int width: the width of the figure. :param int height: the height of the figure. :return: Plotly object figure. """ figure = {} dendro_tree = wgcnaAnalysis.get_dendrogram(dendro_df, dendro_labels, distfun=distfun, linkagefun=linkagefun, div_clusters=False) if dendro_tree is not None: dendrogram = Dendrogram.plot_dendrogram(dendro_tree, hang=hang, cutoff_line=False) layout = go.Layout(width=width, height=height, showlegend=False, title=title, xaxis=dict(domain=[0, 1], range=[np.min(dendrogram['layout']['xaxis']['tickvals'])-6,np.max(dendrogram['layout']['xaxis']['tickvals'])+4], showgrid=False, zeroline=True, ticks='', automargin=True, anchor='y'), yaxis=dict(domain=[0.7, 1], autorange=True, showgrid=False, zeroline=False, ticks='outside', title='Height', automargin=True, anchor='x'), xaxis2=dict(domain=[0, 1], autorange=True, showgrid=True, zeroline=False, ticks='', showticklabels=False, automargin=True, anchor='y2'), yaxis2=dict(domain=[0, 0.64], autorange=True, showgrid=False, zeroline=False, automargin=True, anchor='x2')) if subplot == 'module colors': figure = tools.make_subplots(rows=2, cols=1, print_grid=False) for i in list(dendrogram['data']): figure.append_trace(i, 1, 1) shapes = plot_dendrogram_guidelines(dendro_tree, dendrogram) moduleColors = get_module_color_annotation(dendro_labels, col_annotation=col_annotation, bygene=True, module_colors=subplot_df, dendrogram=dendrogram) figure.append_trace(moduleColors, 2, 1) figure['layout'] = layout figure.layout.template = 'plotly_white' figure['layout'].update({'shapes':shapes, 'xaxis':dict(showticklabels=False), 'yaxis':dict(domain=[0.2, 1]), 'yaxis2':dict(domain=[0, 0.19], title='Module colors', ticks='', showticklabels=False)}) elif subplot == 'heatmap': if all(list(subplot_df.columns.map(lambda x: subplot_df[x].between(-1,1, inclusive=True).all()))) != True: df = wgcnaAnalysis.get_percentiles_heatmap(subplot_df, dendro_tree, bydendro=True, bycols=False).T else: df = wgcnaAnalysis.df_sort_by_dendrogram(wgcnaAnalysis.df_sort_by_dendrogram(subplot_df, dendro_tree).T, dendro_tree) heatmap = get_heatmap(df, colorscale=subplot_colorscale, color_missing=color_missingvals) if row_annotation == True and col_annotation == True: figure = tools.make_subplots(rows=3, cols=2, specs=[[{'colspan':2}, None], [{}, {}], [{'colspan':2}, None]], print_grid=False) for i in list(dendrogram['data']): figure.append_trace(i, 1, 1) for j in list(heatmap['data']): figure.append_trace(j, 2, 2) r_annot, c_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False) figure.append_trace(r_annot, 2, 1) figure.append_trace(c_annot, 3, 1) figure['layout'] = layout figure.layout.template = 'plotly_white' figure['layout'].update({'xaxis':dict(ticks='', showticklabels=False, anchor='y'), 'xaxis2':dict(domain=[0, 0.01], ticks='', showticklabels=False, automargin=True, anchor='y2'), 'xaxis3':dict(domain=[0.015, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'), 'xaxis4':dict(domain=[0.015, 1], ticks='', showticklabels=True, automargin=True, anchor='y4'), 'yaxis':dict(domain=[0.635, 1], automargin=True, anchor='x'), 'yaxis2':dict(domain=[0.015, 0.635], autorange='reversed', ticks='', showticklabels=True, automargin=True, anchor='x2'), 'yaxis3':dict(domain=[0.01, 0.635], autorange='reversed', ticks='', showticklabels=False, automargin=True, anchor='x3'), 'yaxis4':dict(domain=[0,0.01], ticks='', showticklabels=False, automargin=True, anchor='x4')}) elif row_annotation == False and col_annotation == False: figure = tools.make_subplots(rows=2, cols=1, print_grid=False) for i in list(dendrogram['data']): figure.append_trace(i, 1, 1) for j in list(heatmap['data']): figure.append_trace(j, 2, 1) figure['layout'] = layout figure.layout.template = 'plotly_white' figure.layout.update({'xaxis':dict(ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals'])), 'yaxis2':dict(autorange='reversed')}) elif row_annotation == True: figure = tools.make_subplots(rows=2, cols=2, specs=[[{'colspan':2}, None], [{}, {}]], print_grid=False) for i in list(dendrogram['data']): figure.append_trace(i, 1, 1) for j in list(heatmap['data']): figure.append_trace(j, 2, 2) r_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False) figure.append_trace(r_annot, 2, 1) figure['layout'] = layout figure.layout.template = 'plotly_white' figure['layout'].update({'xaxis':dict(domain=[0.015, 1], ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals']), automargin=True, anchor='y'), 'xaxis2':dict(domain=[0, 0.010], ticks='', showticklabels=False, automargin=True, anchor='y2'), 'xaxis3':dict(domain=[0.015, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'), 'yaxis':dict(automargin=True, anchor='x'), 'yaxis2':dict(autorange='reversed', ticks='', showticklabels=True, automargin=True, anchor='x2'), 'yaxis3':dict(domain=[0, 0.64], ticks='', showticklabels=False, automargin=True, anchor='x3')}) elif col_annotation == True: figure = tools.make_subplots(rows=3, cols=1, specs=[[{}], [{}], [{}]], print_grid=False) for i in list(dendrogram['data']): figure.append_trace(i, 1, 1) for j in list(heatmap['data']): figure.append_trace(j, 3, 1) c_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False) figure.append_trace(c_annot, 2, 1) figure['layout'] = layout figure.layout.template = 'plotly_white' figure['layout'].update({'xaxis':dict(ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals']), automargin=True, anchor='y'), 'xaxis2':dict(ticks='', showticklabels=False, automargin=True, anchor='y2'), 'xaxis3':dict(domain=[0, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'), 'yaxis':dict(domain=[0.70, 1], automargin=True, anchor='x'), 'yaxis2':dict(domain=[0.615, 0.625], ticks='', showticklabels=False, automargin=True, anchor='x2'), 'yaxis3':dict(domain=[0, 0.61], autorange='reversed', ticks='', showticklabels=False, automargin=True, anchor='x3')}) return figure