Source code for forgi.visual.mplotlib

from __future__ import print_function
from __future__ import division
from builtins import zip
from past.builtins import basestring

import Bio
import forgi.graph.residue as fgr
import forgi.threedee.model.coarse_grain as ftmc
import forgi.threedee.utilities.pdb as ftup
import forgi.threedee.utilities.vector as ftuv
import math
import matplotlib.pyplot as plt
import numpy as np
import logging
import itertools
import colorsys

log = logging.getLogger(__name__)

[docs]def circles(x, y, s, c='b', ax=None, vmin=None, vmax=None,labels=[], **kwargs):
    """
    Make a scatter of circles plot of x vs y, where x and y are sequence
    like objects of the same lengths. The size of circles are in data scale.

    :param x,y: scalar or array_like, shape (n, )
                Input data
    :param s: scalar or array_like, shape (n, )
              Radius of circle in data scale (ie. in data unit)
    :param c: color or sequence of color, optional, default : 'b'
              `c` can be a single color format string, or a sequence of color
              specifications of length `N`, or a sequence of `N` numbers to be
              mapped to colors using the `cmap` and `norm` specified via kwargs.
              Note that `c` should not be a single numeric RGB or
              RGBA sequence because that is indistinguishable from an array of
              values to be colormapped.  `c` can be a 2-D array in which the
              rows are RGB or RGBA, however.
    :param ax: Axes object, optional, default: None
               Parent axes of the plot. It uses gca() if not specified.
    :param vmin, vmax: scalar, optional, default: None
        `vmin` and `vmax` are used in conjunction with `norm` to normalize
        luminance data.  If either are `None`, the min and max of the
        color array is used.  (Note if you pass a `norm` instance, your
        settings for `vmin` and `vmax` will be ignored.)
    :param kwargs: `~matplotlib.collections.Collection` properties
        eg. alpha, edgecolors, facecolors, linewidths, linestyles, norm, cmap

    :returns: paths : `~matplotlib.collections.PathCollection`


    Examples ::

        a = np.arange(11)
        circles(a, a, a*0.2, c=a, alpha=0.5, edgecolor='none')

    This code by ??? is under [The BSD 3-Clause License]
    (http://opensource.org/licenses/BSD-3-Clause)

    """
    from matplotlib.patches import Circle
    from matplotlib.collections import PatchCollection
    #import matplotlib.colors as colors

    if ax is None:
        ax = plt.gca()

    if isinstance(c, basestring):
        color = c     # ie. use colors.colorConverter.to_rgba_array(c)
    else:
        color = None  # use cmap, norm after collection is created
    kwargs.update(color=color)

    if np.isscalar(x):
        patches = [Circle((x, y), s), ]
    elif np.isscalar(s):
        patches = [Circle((x_, y_), s) for x_, y_ in zip(x, y)]
    else:
        patches = [Circle((x_, y_), s_) for x_, y_, s_ in zip(x, y, s)]

    collection = PatchCollection(patches, **kwargs)

    if color is None:
        collection.set_array(np.asarray(c))
        if vmin is not None or vmax is not None:
            collection.set_clim(vmin, vmax)

    ax.add_collection(collection)
    ax.autoscale_view()
    return collection

def _clashfree_annot_pos(pos, coords):
    for c in coords:
        dist = ftuv.vec_distance(c, pos)
        #log.debug("vec_dist=%s", dist)
        if dist<14:
            return False
    return True

def _annotate_rna_plot(ax, cg, coords, annotations, text_kwargs):
    # Plot annotations
    annot_dict = { elem:elem for elem in cg.defines}
    if annotations is None:
        annot_dict={elem:"" for elem in cg.defines}
    else:
        annot_dict.update(annotations)
    stem_coords = {}
    for stem in cg.stem_iterator():
        stem_start =  np.mean([coords[cg.defines[stem][0]-1],
                               coords[cg.defines[stem][3]-1]],
                               axis=0)
        stem_end =  np.mean([coords[cg.defines[stem][1]-1],
                               coords[cg.defines[stem][2]-1]],
                               axis=0)
        stem_center = np.mean([stem_start, stem_end], axis=0)
        stem_coords[stem]=(stem_start, stem_center, stem_end)
        if annot_dict[stem]:
            stem_vec = stem_end - stem_start
            norm_vec = (stem_vec[1], -stem_vec[0])
            norm_vec/=ftuv.magnitude(norm_vec)
            annot_pos = np.array(stem_center)+23*norm_vec
            #log.debug("Checking clashfree for %s, %s", stem, annot_pos)
            if not _clashfree_annot_pos(annot_pos, coords):
                log.debug("Cannot annotate %s as %s ON THE RIGHT HAND SIDE, because of insufficient space. Trying left side...", stem, annot_dict[stem])
                annot_pos = np.array(stem_center)-23*norm_vec
                #log.debug("Checking clashfree OTHER SIDE for %s, %s", stem, annot_pos)
                if not _clashfree_annot_pos(annot_pos, coords):
                    log.info("Cannot annotate %s as '%s', because of insufficient space.", stem, annot_dict[stem])
                    annot_pos = None
            #log.debug("%s", annot_pos)
            if annot_pos is not None:
                ax.annotate(annot_dict[stem], xy=annot_pos,
                            ha="center", va="center", **text_kwargs )
    for hloop in cg.hloop_iterator():
        hc = []
        for nt in cg.define_residue_num_iterator(hloop, adjacent=True):
            hc.append(coords[nt-1])
        annot_pos = np.mean(hc, axis=0)
        if _clashfree_annot_pos(annot_pos, coords):
            ax.annotate(annot_dict[hloop], xy=annot_pos,
                        ha="center", va="center", **text_kwargs )
        else:
            log.info("Cannot annotate %s as '%s' ON THE INSIDE, because of insufficient space. Trying outside...", hloop, annot_dict[hloop])
            nt1, nt2 = cg.define_a(hloop)
            start = np.mean([coords[nt1-1], coords[nt2-1]], axis=0)
            vec = annot_pos-start
            annot_pos = annot_pos+vec*3
            if _clashfree_annot_pos(annot_pos, coords):
                ax.annotate(annot_dict[hloop], xy=annot_pos,
                            ha="center", va="center", **text_kwargs )
            else:
                log.info("Cannot annotate %s as '%s', because of insufficient space.", hloop, annot_dict[hloop])
    for iloop in cg.iloop_iterator():
        s1, s2 = cg.connections(iloop)
        annot_pos = np.mean([ stem_coords[s1][2], stem_coords[s2][0]], axis=0)
        if _clashfree_annot_pos(annot_pos, coords):
            ax.annotate(annot_dict[iloop], xy=annot_pos,
                        ha="center", va="center", **text_kwargs )
        else:
            log.debug("Cannot annotate %s as '%s' ON THE INSIDE, because of insufficient space. Trying outside...", iloop, annot_dict[iloop])
            loop_vec = stem_coords[s2][0] - stem_coords[s1][2]
            norm_vec = (loop_vec[1], -loop_vec[0])
            norm_vec/=ftuv.magnitude(norm_vec)
            annot_pos_p = np.array(annot_pos)+25*norm_vec
            annot_pos_m = np.array(annot_pos)-25*norm_vec
            # iloops can be asymmetric (more nts on one strand.)
            # plot the label on the strand with more nts.
            plus=0
            minus=0
            for nt in cg.define_residue_num_iterator(iloop):
                if ftuv.vec_distance(annot_pos_p, coords[nt-1])<ftuv.vec_distance(annot_pos_m, coords[nt-1]):
                    plus+=1
                else:
                    minus+=1
            if plus>minus:
                if _clashfree_annot_pos(annot_pos_p, coords):
                    ax.annotate(annot_dict[iloop], xy=annot_pos_p,
                                ha="center", va="center", **text_kwargs )
                else:
                    log.info("Cannot annotate %s as '%s' (only trying inside and right side), because of insufficient space.", iloop, annot_dict[iloop])

            else:
                if _clashfree_annot_pos(annot_pos_m, coords):
                    ax.annotate(annot_dict[iloop], xy=annot_pos_m,
                                ha="center", va="center", **text_kwargs )
                else:
                    log.info("Cannot annotate %s as '%s' (only trying inside and left side), because of insufficient space.", iloop, annot_dict[iloop])
    for mloop in itertools.chain(cg.floop_iterator(), cg.tloop_iterator(), cg.mloop_iterator()):
        nt1, nt2 = cg.define_a(mloop)
        res = list(cg.define_residue_num_iterator(mloop))
        if len(res)==0:
            anchor = np.mean([coords[nt1-1], coords[nt2-1]], axis=0)
        elif len(res)%2==1:
            anchor = coords[res[int(len(res)//2)]-1]
        else:
            anchor =  np.mean([ coords[res[int(len(res)//2)-1]-1],
                                coords[res[int(len(res)//2)]-1] ],
                              axis=0)
        loop_vec = coords[nt1-1] - coords[nt2-1]
        norm_vec = (loop_vec[1], -loop_vec[0])
        norm_vec/=ftuv.magnitude(norm_vec)
        annot_pos = anchor - norm_vec*18
        if _clashfree_annot_pos(annot_pos, coords):
            ax.annotate(annot_dict[mloop], xy=annot_pos,
                        ha="center", va="center", **text_kwargs )
        else:
            log.info("Cannot annotate %s as '%s' , because of insufficient space.", mloop, annot_dict[mloop])

[docs]def plot_rna(cg, ax=None, offset=(0, 0), text_kwargs={}, backbone_kwargs={},
             basepair_kwargs={}, color=True, lighten=0, annotations={}):
    '''
    Plot an RNA structure given a set of nucleotide coordinates

    .. note::

        This function calls set_axis_off on the axis. You can revert this by
        using ax.set_axis_on() if you like to see the axis.

    :param cg: A forgi.threedee.model.coarse_grain.CoarseGrainRNA structure
    :param ax: A matplotlib plotting area
    :param offset: Offset the plot by these coordinates. If a simple True is passed in, then
                   offset by the current width of the plot
    :param text_kwargs: keyword arguments passed to matplotlib.pyplot.annotate
                        for plotting of the sequence
    :param backbone_kwargs: keyword arguments passed to matplotlib.pyplot.plot
                        for plotting of the backbone links
    :param basepair_kwargs: keyword arguments passed to matplotlib.pyplot.plot
                        for plotting of the basepair links
    :param lighten: Make circles lighter. A percent value where 1 makes
                    everything white and 0 leaves the colors unchanged
    :param annotations: A dictionary {elem_name: string} or None.
                        By default, the element names (e.g. "s0") are plotted
                        next to the element. This dictionary can be used to
                        override the default element names by costum strings.
                        To remove individual annotations, assign an empty string to the key.
                        To remove all annotations, set this to None.

                        .. warning::

                            Annotations are not shown, if there is not enough space.
                            Annotations not shown are logged with level INFO
    :return: (ax, coords) The axes and the coordinates for each nucleotide
    '''
    log.info("Starting to plot RNA...")
    import RNA
    import matplotlib.colors as mc
    RNA.cvar.rna_plot_type = 1

    coords = []
    #colors = []
    #circles = []

    bp_string = cg.to_dotbracket_string()
    # get the type of element of each nucleotide
    el_string = cg.to_element_string()
    # i.e. eeesssshhhhsssseeee
    el_to_color = {'f': 'orange',
                   't': 'orange',
                   's': 'green',
                   'h': 'blue',
                   'i': 'yellow',
                   'm': 'red'}

    if ax is None:
        ax = plt.gca()

    if offset is None:
        offset = (0, 0)
    elif offset is True:
        offset = (ax.get_xlim()[1], ax.get_ylim()[1])
    else:
        pass

    vrna_coords = RNA.get_xy_coordinates(bp_string)
    # TODO Add option to rotate the plot
    for i, _ in enumerate(bp_string):
        coord = (offset[0] + vrna_coords.get(i).X,
                 offset[1] + vrna_coords.get(i).Y)
        coords.append(coord)
    coords = np.array(coords)
    # First plot backbone
    bkwargs = {"color":"black", "zorder":0}
    bkwargs.update(backbone_kwargs)
    ax.plot(coords[:,0], coords[:,1], **bkwargs)
    # Now plot basepairs
    basepairs = []
    for s in cg.stem_iterator():
        for p1, p2 in cg.stem_bp_iterator(s):
            basepairs.append([coords[p1-1], coords[p2-1]])
    if basepairs:
        basepairs = np.array(basepairs)
        if color:
            c = "red"
        else:
            c = "black"
            bpkwargs = {"color":c, "zorder":0, "linewidth":3}
            bpkwargs.update(basepair_kwargs)
            ax.plot(basepairs[:,:,0].T, basepairs[:,:,1].T, **bpkwargs)
    # Now plot circles
    for i, coord in enumerate(coords):
        if color:
            c = el_to_color[el_string[i]]
            h,l,s = colorsys.rgb_to_hls(*mc.to_rgb(c))
            if lighten>0:
                l += (1-l)*min(1,lighten)
            else:
                l +=l*max(-1, lighten)
            if l>1 or l<0:
                print(l)
            c=colorsys.hls_to_rgb(h,l,s)
            circle = plt.Circle((coord[0], coord[1]),
                            color=c)
        else:
            circle = plt.Circle((coord[0], coord[1]),
                                edgecolor="black", facecolor="white")

        ax.add_artist(circle)
        if cg.seq:
            if "fontweight" not in text_kwargs:
                text_kwargs["fontweight"]="bold"
            ax.annotate(cg.seq[i+1],xy=coord, ha="center", va="center", **text_kwargs )

    all_coords=list(coords)
    ntnum_kwargs = {"color":"gray"}
    ntnum_kwargs.update(text_kwargs)
    for nt in range(10, cg.seq_length, 10):
        # We try different angles
        annot_pos = _find_annot_pos_on_circle(nt, all_coords, cg)
        if annot_pos is not None:
            ax.annotate(str(nt), xy=coords[nt-1], xytext=annot_pos,
                        arrowprops={"width":1, "headwidth":1, "color":"gray"},
                        ha="center", va="center", zorder=0, **ntnum_kwargs)
            all_coords.append(annot_pos)

    _annotate_rna_plot(ax, cg, all_coords, annotations, text_kwargs)
    datalim = ((min(list(coords[:, 0]) + [ax.get_xlim()[0]]),
                min(list(coords[:, 1]) + [ax.get_ylim()[0]])),
               (max(list(coords[:, 0]) + [ax.get_xlim()[1]]),
                max(list(coords[:, 1]) + [ax.get_ylim()[1]])))

    '''
    min_coord = min(datalim[0][0], datalim[0][1])
    max_coord = max(datalim[1][0], datalim[1][1])
    datalim = ((min_coord, min_coord), (max_coord, max_coord))

    print "min_coord:", min_coord
    print "max_coord:", max_coord
    print "datalime:", datalim
    '''

    width = datalim[1][0] - datalim[0][0]
    height = datalim[1][1] - datalim[0][1]

    #ax.set_aspect(width / height)
    ax.set_aspect('equal', 'datalim')
    ax.update_datalim(datalim)
    ax.autoscale_view()
    ax.set_axis_off()

    return (ax, coords)

def _find_annot_pos_on_circle(nt, coords, cg):
    for i in range(5):
        for sign in [-1,1]:
            a = np.pi/4*i*sign
            if cg.get_elem(nt)[0]=="s":
                bp = cg.pairing_partner(nt)
                anchor = coords[bp-1]
            else:
                anchor =np.mean([ coords[nt-2], coords[nt]], axis=0)
            vec = coords[nt-1]-anchor
            vec=vec/ftuv.magnitude(vec)
            rotated_vec =  np.array([vec[0]*math.cos(a)-vec[1]*math.sin(a),
                                     vec[0]*math.sin(a)+vec[1]*math.cos(a)])
            annot_pos = coords[nt-1]+rotated_vec*18
            if _clashfree_annot_pos(annot_pos, coords):
                log.debug("Annot pos on c is %s",annot_pos)
                return annot_pos
    return None

[docs]def plot_pdb(filename, ax=None):
    """
    Plot the secondary structure of an RNA in a PDB file using the
    Graph Layout from the ViennaRNA package and indicate long-range
    interations and RNA-protein interactions.

    Interchain RNA-RNA interactions are shown as red lines.
    Proteins are shown as transparent gray circles with lines
    indicating the interacting residues. The circle radius corresponds
    to the number of interacting nucleotides.


    :param structure: A Bio.PDB.Structure
    :param ax: Optional. An matplotlib axis object
    :return: An Axes object (ax)
    """

    structure = Bio.PDB.PDBParser().get_structure('blah', filename)
    model = list(structure)[0]
    chain_coords = {}
    cgs = {}

    import collections as col

    # store a list of RNA nucleotides that each protein interacts with
    protein_interactions = col.defaultdict(set)
    protein_circles = []

    for chain in model:
        # iterate over RNAs
        if ftup.contains_rna(chain):
            # convert to cg and store so that we can convert pdb nucleotide ids to
            # secondary structure indexes later
            cg, = ftmc.CoarseGrainRNA.from_pdb(filename, load_chains=chain.id)
            cgs[chain.id] = cg

            # plot the structure and store the coordinates
            (ax, coords) = plot_rna(cg, offset=True, ax=ax)
            chain_coords[chain.id] = coords

    for (a1, a2) in ftup.interchain_contacts(structure):
        # iterate over all the interactions in order to find out which
        # nucleotides this protein interacts with
        chain1 = a1.parent.parent
        chain2 = a2.parent.parent

        if ftup.is_protein(chain1) and ftup.contains_rna(chain2):
            # collect all the RNA nucleotides that a protein interacts with
            sid = cgs[chain2.id].seq.to_integer(fgr.RESID(chain2.id, a2.parent.id))-1
            protein_interactions[chain1.id].add((chain2.id, sid))

        if ftup.contains_rna(chain1) and ftup.contains_rna(chain2):
            try:
                sid1 = cgs[chain1.id].seq.to_integer(fgr.RESID(chain1.id, a1.parent.id))-1
                sid2 = cgs[chain2.id].seq.to_integer(fgr.RESID(chain2.id, a2.parent.id))-1
            except ValueError:
                continue

            coord1 = chain_coords[chain1.id][sid1]
            coord2 = chain_coords[chain2.id][sid2]

            ax.plot([coord1[0], coord2[0]], [coord1[1], coord2[1]],
                    'k-', alpha=0.5)

    for chain in model:
        # draw each protein and the links that it has to other nucleotides
        if ftup.is_protein(chain):
            # the protein will be positioned at the centroid of the nucleotides
            # that it interacts with
            interacting_coords = [np.array(chain_coords[chain_id][nuc_num])
                                  for (chain_id, nuc_num) in protein_interactions[chain.id]]

            centroid = np.sum(interacting_coords, axis=0) / \
                len(interacting_coords)

            # the size of the circle representing it will be proportional to its
            # length (in nucleotides)
            radius = 2 * math.sqrt(len(chain.get_list()))
            protein_circles += [[centroid[0], centroid[1], radius]]

            # draw all of the interactions as lines
            for coord in interacting_coords:
                ax.plot([coord[0], centroid[0]], [
                        coord[1], centroid[1]], 'k-', alpha=0.5)

    protein_circles = np.array(protein_circles)
    if len(protein_circles) > 0:
        circles(protein_circles[:, 0], protein_circles[:, 1],
                protein_circles[:, 2], 'grey', alpha=0.5)

    # plt.axis('off')

    return ax
forgi 2.0.0 documentation

Source code for forgi.visual.mplotlib