Source code for ckg.analytics_core.viz.wgcnaFigures

import pandas as pd
import numpy as np
import scipy as scp
from ckg.analytics_core.viz import color_list
import plotly.graph_objs as go
import plotly.subplots as tools
from ckg.analytics_core.viz import Dendrogram
from ckg.analytics_core.analytics import wgcnaAnalysis


[docs]def get_module_color_annotation(map_list, col_annotation=False, row_annotation=False, bygene=False, module_colors=[], dendrogram=[]):
    """
    This function takes a list of values, converts them into colors, and creates a new plotly object to be used as an annotation.
    Options module_colors and dendrogram only apply when map_list is a list of experimental features used in module eigenegenes calculation.

    :param list map_list: dendrogram leaf labels.
    :param bool col_annotation: if True, adds color annotations as a row.
    :param bool row_annotation: if True, adds color annotations as a column.
    :param bool bygene: determines wether annotation colors have to be reordered to match dendrogram leaf labels.
    :param list module_colors: dendrogram leaf module color.
    :param dict dendrogram: dendrogram represented as a plotly object figure.
    :return: Plotly object figure.

    .. note:: map_list and module_colors must have the same length.
    """
    colors_dict = color_list.make_color_dict()

    n = len(map_list)
    val = 1/(n-1)
    number = 0
    colors = []
    vals = []

    #Use if color annotation is for experimental features in dendrogram
    if bygene:
        module_colors = [i.lower().replace(' ', '') for i in module_colors]
        gene_colors = dict(zip(map_list, module_colors))

        for i in map_list:
            name = gene_colors[i]
            color = colors_dict[name]
            n = number
            colors.append([round(n,4), color])
            vals.append((i, round(n,4)))
            number = n+val

        labels = list(dendrogram['layout']['xaxis']['ticktext'])
        y = [1]*len(labels)

        df = pd.DataFrame([labels, y], index=['labels', 'y']).T
        df['vals'] = df['labels'].map(dict(vals))

    #Use if map_list is a list of co-expression modules names
    else:
        for i in map_list:
            name = i.split('ME')
            if len(name) == 2:
                name = name[1]
                color = colors_dict[name]
                n = number
                colors.append([round(n,4), color])
                vals.append((i, round(n,4)))
                number = n+val
            else:
                name = name[0]
                n = number
                colors.append([round(n,4), '#ffffff'])
                vals.append((i, round(n,4)))
                number = n+val

        y = [1]*len(map_list)
        df = pd.DataFrame([map_list, y], index=['labels', 'y']).T
        df['vals'] = df['labels'].map(dict(vals))

    if row_annotation and col_annotation:
        r_annot = go.Heatmap(z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis='x', yaxis='y')
        c_annot = go.Heatmap(z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2')
        return r_annot, c_annot
    elif row_annotation:
        r_annot = go.Heatmap(z=df.vals, x=df.y, y=df.labels, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2')
        return r_annot
    elif col_annotation:
        c_annot = go.Heatmap(z=df.vals, x=df.labels, y=df.y, showscale=False, colorscale=colors, xaxis='x2', yaxis='y2')
        return c_annot

    return None


[docs]def get_heatmap(df, colorscale=None, color_missing=True):
    """
    This function plots a simple Plotly heatmap.

    :param df: pandas dataframe containing experimental data, with samples/subjects as rows and features as columns.
    :param list[list] colorscale: heatmap colorscale (e.g. [[0,'#67a9cf'],[0.5,'#f7f7f7'],[1,'#ef8a62']]). If colorscale is not defined, will take [[0, 'rgb(255,255,255)'], [1, 'rgb(255,51,0)']] as default.
    :param bool color_missing: if set to True, plots missing values as grey in the heatmap.
    :return: Plotly object figure.
    """
    figure = {}
    if df is not None:
        if colorscale:
            colors = colorscale
        else:
            colors = [[0, 'rgb(255,255,255)'], [1, 'rgb(255,51,0)']]

        figure = {'layout': {'template': None}, 'data': []}
        figure['layout']['template'] = 'plotly_white'
        figure['data'].append(go.Heatmap(z=df.values.tolist(), y=list(df.index), x=list(df.columns),
                                        colorscale=colors, showscale=True,
                                        colorbar=dict(x=1, y=0, xanchor='left', yanchor='bottom', len=0.35, thickness=15)))
        if color_missing:
            df_missing = wgcnaAnalysis.get_miss_values_df(df)
            figure['data'].append(go.Heatmap(z=df_missing.values.tolist(),
                                        y=list(df.index),
                                        x=list(df.columns),
                                        colorscale=[[0, 'rgb(201,201,201)'], [1, 'rgb(201,201,201)']],
                                        showscale=False))

    return figure


[docs]def plot_labeled_heatmap(df, textmatrix, title, colorscale=[[0, 'rgb(0,255,0)'], [0.5, 'rgb(255,255,255)'], [1, 'rgb(255,0,0)']], width=1200, height=800, row_annotation=False, col_annotation=False):
    """
    This function plots a simple Plotly heatmap with column and/or row annotations and heatmap annotations.

    :param df: pandas dataframe containing data to be plotted in the heatmap.
    :param textmatrix: pandas dataframe with heatmap annotations as values.
    :param str title: the title of the figure.
    :param list[list] colorscale: heatmap colorscale (e.g. [[0,'rgb(0,255,0)'],[0.5,'rgb(255,255,255)'],[1,'rgb(255,0,0)']])
    :param int width: the width of the figure.
    :param int height: the height of the figure.
    :param bool row_annotation: if True, adds a color-coded column at the left of the heatmap.
    :param bool col_annotation: if True, adds a color-coded row at the bottom of the heatmap.
    :return: Plotly object figure.
    """
    figure = {}
    if df is not None:
        figure = get_heatmap(df, colorscale=colorscale, color_missing=False)
        figure['data'].append(get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False))

        annotations = []
        for n, row in enumerate(textmatrix.values):
            for m, val in enumerate(row):
                annotations.append(go.layout.Annotation(text=str(textmatrix.values[n][m]), font=dict(size=8),
                                                        x=df.columns[m], y=df.index[n], xref='x', yref='y', showarrow=False))

        layout = go.Layout(width=width, height=height, title=title,
                        xaxis=dict(domain=[0.015, 1], autorange=True, showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=True, automargin=True, anchor='y'),
                        yaxis=dict(autorange='reversed', ticklen=5, ticks='outside', tickcolor='white', showticklabels=False, automargin=True, showgrid=False, anchor='x'),
                        xaxis2=dict(domain=[0, 0.01], autorange=True, showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=False, automargin=True, anchor='y2'),
                        yaxis2=dict(autorange='reversed', showgrid=False, zeroline=False, showline=False, ticks='', showticklabels=True, automargin=True, anchor='x2'))

        figure['layout'] = layout
        figure['layout']['template'] = 'plotly_white'
        figure['layout'].update(annotations=annotations)


    return figure


[docs]def plot_dendrogram_guidelines(Z_tree, dendrogram):
    """
    This function takes a dendrogram tree dictionary and its plotly object and creates shapes to be plotted as vertical dashed lines in the dendrogram.

    :param dict Z_tree: dictionary of data structures computed to render the dendrogram. Keys: 'icoords', 'dcoords', 'ivl' and 'leaves'.
    :param dendrogram: dendrogram represented as a plotly object figure.
    :return: List of dictionaries.
    """
    shapes = []
    if dendrogram is not None:
        tickvals = list(dendrogram['layout']['xaxis']['tickvals'])
        maximum = len(tickvals)
        step = int(maximum/8)
        minimum = int(0+step)

        keys = ['type', 'x0', 'y0', 'x1', 'y1', 'line']
        line_keys = ['color', 'width', 'dash']
        line_vals = ['rgb(192,192,192)', 0.1, 'dot']
        line = dict(zip(line_keys,line_vals))

        values = []
        for i in tickvals[minimum::step]:
            values.append(('line', i, 0.3, i, np.max(Z_tree['dcoord'])))

        values = [list(i)+[line] for i in values]
        shapes = []
        for i in values:
            d = dict(zip(keys, i))
            shapes.append(d)

    return shapes


[docs]def plot_intramodular_correlation(MM, FS, feature_module_df, title, width=1000, height=800):
    """
    This function uses the Feature significance and Module Membership measures, and plots a multi-scatter plot of all modules against all clinical traits.

    :param MM: pandas dataframe with module membership data
    :param FS: pandas dataframe with feature significance data
    :param feature_module_df: pandas DataFrame of experimental features and module colors (use mode='dataframe' in get_FeaturesPerModule)
    :param str title: plot title
    :param int width: plot width
    :param int height: plot height
    :return: Plotly object figure.

    Example::

        plot = plot_intramodular_correlation(MM, FS, feature_module_df, title='Plot', width=1000, height=800):

    .. note:: There is a limit in the number of subplots one can make in Plotly. This function limits the number of modules shown to 5.
    """
    figure = {}
    if MM is not None:
        MM = MM.iloc[:, -6]
        MM['modColor'] = MM.index.map(feature_module_df.set_index('name')['modColor'].get)

        figure = tools.make_subplots(rows=len(FS.columns), cols=len(MM.columns) - 1, shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.015, horizontal_spacing=0.1, print_grid=True)

        figure.layout.template = 'plotly_white'
        layout = dict(width=width, height=height, showlegend=False, title=title)
        figure.layout.update(layout)

        axis_dict = {}
        for i, j in enumerate(MM.columns[MM.columns.str.startswith('MM')]):
            n_p = len(FS.columns) * (len(MM.columns)-1)-len(MM.columns[MM.columns.str.startswith('MM')])
            axis_dict['xaxis{}'.format(n_p+i+1)] = dict(title=j, titlefont=dict(size=13))
        print(axis_dict)
        n = 1
        for a, b in enumerate(FS.columns):
            name = b.split(' ')
            if len(name) > 1:
                label = ['<br>'.join(name[i:i+3]) for i in range(0, len(name), 3)][0]
            else:
                label = name[0]
            axis_dict['yaxis{}'.format(a+n)] = dict(title=label, titlefont=dict(size=13))
            n += len(MM.columns[MM.columns.str.startswith('MM')])-1

        annotation = []
        x_axis = 1
        y_axis = 1
        for a, b in enumerate(FS.columns):
            for i, j in enumerate(MM.columns[MM.columns.str.startswith('MM')]):
                name = MM[MM['modColor'] == j[2:]].index
                x = abs(MM[MM['modColor'] == j[2:]][j].values)
                y = abs(FS[FS.index.isin(name)][b].values)

                slope, intercept, r_value, p_value, std_err = scp.stats.linregress(x, y)
                line = slope*x+intercept

                figure.append_trace(go.Scattergl(x = x,
                                                y = y,
                                                text = name,
                                                mode = 'markers',
                                                opacity=0.7,
                                                marker={'size': 7,
                                                        'color': 'white',
                                                        'line': {'width': 1.5, 'color': j[2:]}}), a+1, i+1)

                figure.append_trace(go.Scattergl(x = x, y = line, mode = 'lines', marker={'color': 'black'}), a+1, i+1)

                annot = dict(x = 0.7, y = 0.7,
                            xref = 'x{}'.format(x_axis), yref = 'y{}'.format(y_axis),
                            text = 'R={:0.2}, p={:.0e}'.format(r_value, p_value),
                            showarrow = False)
                annotation.append(annot)
                x_axis += 1
                y_axis += 1


        figure.layout.update(axis_dict)
        figure.layout.update(annotations = annotation)

    return figure

[docs]def plot_complex_dendrogram(dendro_df, subplot_df, title, dendro_labels=[], distfun='euclidean', linkagefun='average', hang=0.04, subplot='module colors', subplot_colorscale=[], color_missingvals=True, row_annotation=False, col_annotation=False, width=1000, height=800):
    """
    This function plots a dendrogram with a subplot below that can be a heatmap (annotated or not) or module colors.

    :param dendro_df: pandas dataframe containing data used to generate dendrogram, columns will result in dendrogram leaves.
    :param subplot_df: pandas dataframe containing data used to generate plot below dendrogram.
    :param str title: the title of the figure.
    :param list dendro_labels: list of strings for dendrogram leaf nodes labels.
    :param str distfun: distance measure to be used (‘euclidean‘, ‘maximum‘, ‘manhattan‘, ‘canberra‘, ‘binary‘, ‘minkowski‘ or ‘jaccard‘).
    :param str linkagefun: hierarchical/agglomeration method to be used (‘single‘, ‘complete‘, ‘average‘, ‘weighted‘, ‘centroid‘, ‘median‘ or ‘ward‘).
    :param float hang: height at which the dendrogram leaves should be placed.
    :param str subplot: type of plot to be shown below the dendrogram (´module colors´ or ´heatmap´).
    :param list subplot_colorscale: colorscale to be used in the subplot.
    :param bool color_missingvals: if set to `True`, plots missing values as grey in the heatmap.
    :param bool row_annotation: if `True`, adds a color-coded column at the left of the heatmap.
    :param bool col_annotation: if `True`, adds a color-coded row at the bottom of the heatmap.
    :param int width: the width of the figure.
    :param int height: the height of the figure.
    :return: Plotly object figure.
    """
    figure = {}
    dendro_tree = wgcnaAnalysis.get_dendrogram(dendro_df, dendro_labels, distfun=distfun, linkagefun=linkagefun, div_clusters=False)
    if dendro_tree is not None:
        dendrogram = Dendrogram.plot_dendrogram(dendro_tree, hang=hang, cutoff_line=False)

        layout = go.Layout(width=width, height=height, showlegend=False, title=title,
                        xaxis=dict(domain=[0, 1], range=[np.min(dendrogram['layout']['xaxis']['tickvals'])-6,np.max(dendrogram['layout']['xaxis']['tickvals'])+4], showgrid=False,
                                    zeroline=True, ticks='', automargin=True, anchor='y'),
                        yaxis=dict(domain=[0.7, 1], autorange=True, showgrid=False, zeroline=False, ticks='outside', title='Height', automargin=True, anchor='x'),
                        xaxis2=dict(domain=[0, 1], autorange=True, showgrid=True, zeroline=False, ticks='', showticklabels=False, automargin=True, anchor='y2'),
                        yaxis2=dict(domain=[0, 0.64], autorange=True, showgrid=False, zeroline=False, automargin=True, anchor='x2'))


        if subplot == 'module colors':
            figure = tools.make_subplots(rows=2, cols=1, print_grid=False)

            for i in list(dendrogram['data']):
                figure.append_trace(i, 1, 1)

            shapes = plot_dendrogram_guidelines(dendro_tree, dendrogram)
            moduleColors = get_module_color_annotation(dendro_labels, col_annotation=col_annotation, bygene=True, module_colors=subplot_df, dendrogram=dendrogram)
            figure.append_trace(moduleColors, 2, 1)
            figure['layout'] = layout
            figure.layout.template = 'plotly_white'
            figure['layout'].update({'shapes':shapes,
                                'xaxis':dict(showticklabels=False),
                                'yaxis':dict(domain=[0.2, 1]),
                                'yaxis2':dict(domain=[0, 0.19], title='Module colors', ticks='', showticklabels=False)})


        elif subplot == 'heatmap':
            if all(list(subplot_df.columns.map(lambda x: subplot_df[x].between(-1,1, inclusive=True).all()))) != True:
                df = wgcnaAnalysis.get_percentiles_heatmap(subplot_df, dendro_tree, bydendro=True, bycols=False).T
            else:
                df = wgcnaAnalysis.df_sort_by_dendrogram(wgcnaAnalysis.df_sort_by_dendrogram(subplot_df, dendro_tree).T, dendro_tree)

            heatmap = get_heatmap(df, colorscale=subplot_colorscale, color_missing=color_missingvals)


            if row_annotation == True and col_annotation == True:
                figure = tools.make_subplots(rows=3, cols=2, specs=[[{'colspan':2}, None],
                                                                [{}, {}],
                                                                [{'colspan':2}, None]], print_grid=False)
                for i in list(dendrogram['data']):
                    figure.append_trace(i, 1, 1)
                for j in list(heatmap['data']):
                    figure.append_trace(j, 2, 2)

                r_annot, c_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False)
                figure.append_trace(r_annot, 2, 1)
                figure.append_trace(c_annot, 3, 1)

                figure['layout'] = layout
                figure.layout.template = 'plotly_white'
                figure['layout'].update({'xaxis':dict(ticks='', showticklabels=False, anchor='y'),
                                        'xaxis2':dict(domain=[0, 0.01], ticks='', showticklabels=False, automargin=True, anchor='y2'),
                                        'xaxis3':dict(domain=[0.015, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'),
                                        'xaxis4':dict(domain=[0.015, 1], ticks='', showticklabels=True, automargin=True, anchor='y4'),
                                        'yaxis':dict(domain=[0.635, 1], automargin=True, anchor='x'),
                                        'yaxis2':dict(domain=[0.015, 0.635], autorange='reversed', ticks='', showticklabels=True, automargin=True, anchor='x2'),
                                        'yaxis3':dict(domain=[0.01, 0.635], autorange='reversed', ticks='', showticklabels=False, automargin=True, anchor='x3'),
                                        'yaxis4':dict(domain=[0,0.01], ticks='', showticklabels=False, automargin=True, anchor='x4')})



            elif row_annotation == False and col_annotation == False:
                figure = tools.make_subplots(rows=2, cols=1, print_grid=False)

                for i in list(dendrogram['data']):
                    figure.append_trace(i, 1, 1)
                for j in list(heatmap['data']):
                    figure.append_trace(j, 2, 1)

                figure['layout'] = layout
                figure.layout.template = 'plotly_white'
                figure.layout.update({'xaxis':dict(ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals'])),
                                'yaxis2':dict(autorange='reversed')})

            elif row_annotation == True:
                figure = tools.make_subplots(rows=2, cols=2, specs=[[{'colspan':2}, None],
                                                                [{}, {}]], print_grid=False)
                for i in list(dendrogram['data']):
                    figure.append_trace(i, 1, 1)
                for j in list(heatmap['data']):
                    figure.append_trace(j, 2, 2)

                r_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False)
                figure.append_trace(r_annot, 2, 1)

                figure['layout'] = layout
                figure.layout.template = 'plotly_white'
                figure['layout'].update({'xaxis':dict(domain=[0.015, 1], ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals']), automargin=True, anchor='y'),
                                        'xaxis2':dict(domain=[0, 0.010], ticks='', showticklabels=False, automargin=True, anchor='y2'),
                                        'xaxis3':dict(domain=[0.015, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'),
                                        'yaxis':dict(automargin=True, anchor='x'),
                                        'yaxis2':dict(autorange='reversed', ticks='', showticklabels=True, automargin=True, anchor='x2'),
                                        'yaxis3':dict(domain=[0, 0.64], ticks='', showticklabels=False, automargin=True, anchor='x3')})

            elif col_annotation == True:
                figure = tools.make_subplots(rows=3, cols=1, specs=[[{}], [{}], [{}]], print_grid=False)

                for i in list(dendrogram['data']):
                    figure.append_trace(i, 1, 1)
                for j in list(heatmap['data']):
                    figure.append_trace(j, 3, 1)

                c_annot = get_module_color_annotation(list(df.index), row_annotation=row_annotation, col_annotation=col_annotation, bygene=False)
                figure.append_trace(c_annot, 2, 1)

                figure['layout'] = layout
                figure.layout.template = 'plotly_white'
                figure['layout'].update({'xaxis':dict(ticktext=np.array(dendrogram['layout']['xaxis']['ticktext']), tickvals=list(dendrogram['layout']['xaxis']['tickvals']), automargin=True, anchor='y'),
                                        'xaxis2':dict(ticks='', showticklabels=False, automargin=True, anchor='y2'),
                                        'xaxis3':dict(domain=[0, 1], ticks='', showticklabels=False, automargin=True, anchor='y3'),
                                        'yaxis':dict(domain=[0.70, 1], automargin=True, anchor='x'),
                                        'yaxis2':dict(domain=[0.615, 0.625], ticks='', showticklabels=False, automargin=True, anchor='x2'),
                                        'yaxis3':dict(domain=[0, 0.61], autorange='reversed', ticks='', showticklabels=False, automargin=True, anchor='x3')})

    return figure