forgi 2.0.0 documentation

Contents

Source code for forgi.visual.mplotlib

from __future__ import print_function
from __future__ import division
from builtins import zip
from past.builtins import basestring

import Bio
import forgi.graph.residue as fgr
import forgi.threedee.model.coarse_grain as ftmc
import forgi.threedee.utilities.pdb as ftup
import forgi.threedee.utilities.vector as ftuv
import math
import matplotlib.pyplot as plt
import numpy as np
import logging
import itertools
import colorsys

log = logging.getLogger(__name__)

[docs]def circles(x, y, s, c='b', ax=None, vmin=None, vmax=None,labels=[], **kwargs): """ Make a scatter of circles plot of x vs y, where x and y are sequence like objects of the same lengths. The size of circles are in data scale. :param x,y: scalar or array_like, shape (n, ) Input data :param s: scalar or array_like, shape (n, ) Radius of circle in data scale (ie. in data unit) :param c: color or sequence of color, optional, default : 'b' `c` can be a single color format string, or a sequence of color specifications of length `N`, or a sequence of `N` numbers to be mapped to colors using the `cmap` and `norm` specified via kwargs. Note that `c` should not be a single numeric RGB or RGBA sequence because that is indistinguishable from an array of values to be colormapped. `c` can be a 2-D array in which the rows are RGB or RGBA, however. :param ax: Axes object, optional, default: None Parent axes of the plot. It uses gca() if not specified. :param vmin, vmax: scalar, optional, default: None `vmin` and `vmax` are used in conjunction with `norm` to normalize luminance data. If either are `None`, the min and max of the color array is used. (Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored.) :param kwargs: `~matplotlib.collections.Collection` properties eg. alpha, edgecolors, facecolors, linewidths, linestyles, norm, cmap :returns: paths : `~matplotlib.collections.PathCollection` Examples :: a = np.arange(11) circles(a, a, a*0.2, c=a, alpha=0.5, edgecolor='none') This code by ??? is under [The BSD 3-Clause License] (http://opensource.org/licenses/BSD-3-Clause) """ from matplotlib.patches import Circle from matplotlib.collections import PatchCollection #import matplotlib.colors as colors if ax is None: ax = plt.gca() if isinstance(c, basestring): color = c # ie. use colors.colorConverter.to_rgba_array(c) else: color = None # use cmap, norm after collection is created kwargs.update(color=color) if np.isscalar(x): patches = [Circle((x, y), s), ] elif np.isscalar(s): patches = [Circle((x_, y_), s) for x_, y_ in zip(x, y)] else: patches = [Circle((x_, y_), s_) for x_, y_, s_ in zip(x, y, s)] collection = PatchCollection(patches, **kwargs) if color is None: collection.set_array(np.asarray(c)) if vmin is not None or vmax is not None: collection.set_clim(vmin, vmax) ax.add_collection(collection) ax.autoscale_view() return collection
def _clashfree_annot_pos(pos, coords): for c in coords: dist = ftuv.vec_distance(c, pos) #log.debug("vec_dist=%s", dist) if dist<14: return False return True def _annotate_rna_plot(ax, cg, coords, annotations, text_kwargs): # Plot annotations annot_dict = { elem:elem for elem in cg.defines} if annotations is None: annot_dict={elem:"" for elem in cg.defines} else: annot_dict.update(annotations) stem_coords = {} for stem in cg.stem_iterator(): stem_start = np.mean([coords[cg.defines[stem][0]-1], coords[cg.defines[stem][3]-1]], axis=0) stem_end = np.mean([coords[cg.defines[stem][1]-1], coords[cg.defines[stem][2]-1]], axis=0) stem_center = np.mean([stem_start, stem_end], axis=0) stem_coords[stem]=(stem_start, stem_center, stem_end) if annot_dict[stem]: stem_vec = stem_end - stem_start norm_vec = (stem_vec[1], -stem_vec[0]) norm_vec/=ftuv.magnitude(norm_vec) annot_pos = np.array(stem_center)+23*norm_vec #log.debug("Checking clashfree for %s, %s", stem, annot_pos) if not _clashfree_annot_pos(annot_pos, coords): log.debug("Cannot annotate %s as %s ON THE RIGHT HAND SIDE, because of insufficient space. Trying left side...", stem, annot_dict[stem]) annot_pos = np.array(stem_center)-23*norm_vec #log.debug("Checking clashfree OTHER SIDE for %s, %s", stem, annot_pos) if not _clashfree_annot_pos(annot_pos, coords): log.info("Cannot annotate %s as '%s', because of insufficient space.", stem, annot_dict[stem]) annot_pos = None #log.debug("%s", annot_pos) if annot_pos is not None: ax.annotate(annot_dict[stem], xy=annot_pos, ha="center", va="center", **text_kwargs ) for hloop in cg.hloop_iterator(): hc = [] for nt in cg.define_residue_num_iterator(hloop, adjacent=True): hc.append(coords[nt-1]) annot_pos = np.mean(hc, axis=0) if _clashfree_annot_pos(annot_pos, coords): ax.annotate(annot_dict[hloop], xy=annot_pos, ha="center", va="center", **text_kwargs ) else: log.info("Cannot annotate %s as '%s' ON THE INSIDE, because of insufficient space. Trying outside...", hloop, annot_dict[hloop]) nt1, nt2 = cg.define_a(hloop) start = np.mean([coords[nt1-1], coords[nt2-1]], axis=0) vec = annot_pos-start annot_pos = annot_pos+vec*3 if _clashfree_annot_pos(annot_pos, coords): ax.annotate(annot_dict[hloop], xy=annot_pos, ha="center", va="center", **text_kwargs ) else: log.info("Cannot annotate %s as '%s', because of insufficient space.", hloop, annot_dict[hloop]) for iloop in cg.iloop_iterator(): s1, s2 = cg.connections(iloop) annot_pos = np.mean([ stem_coords[s1][2], stem_coords[s2][0]], axis=0) if _clashfree_annot_pos(annot_pos, coords): ax.annotate(annot_dict[iloop], xy=annot_pos, ha="center", va="center", **text_kwargs ) else: log.debug("Cannot annotate %s as '%s' ON THE INSIDE, because of insufficient space. Trying outside...", iloop, annot_dict[iloop]) loop_vec = stem_coords[s2][0] - stem_coords[s1][2] norm_vec = (loop_vec[1], -loop_vec[0]) norm_vec/=ftuv.magnitude(norm_vec) annot_pos_p = np.array(annot_pos)+25*norm_vec annot_pos_m = np.array(annot_pos)-25*norm_vec # iloops can be asymmetric (more nts on one strand.) # plot the label on the strand with more nts. plus=0 minus=0 for nt in cg.define_residue_num_iterator(iloop): if ftuv.vec_distance(annot_pos_p, coords[nt-1])<ftuv.vec_distance(annot_pos_m, coords[nt-1]): plus+=1 else: minus+=1 if plus>minus: if _clashfree_annot_pos(annot_pos_p, coords): ax.annotate(annot_dict[iloop], xy=annot_pos_p, ha="center", va="center", **text_kwargs ) else: log.info("Cannot annotate %s as '%s' (only trying inside and right side), because of insufficient space.", iloop, annot_dict[iloop]) else: if _clashfree_annot_pos(annot_pos_m, coords): ax.annotate(annot_dict[iloop], xy=annot_pos_m, ha="center", va="center", **text_kwargs ) else: log.info("Cannot annotate %s as '%s' (only trying inside and left side), because of insufficient space.", iloop, annot_dict[iloop]) for mloop in itertools.chain(cg.floop_iterator(), cg.tloop_iterator(), cg.mloop_iterator()): nt1, nt2 = cg.define_a(mloop) res = list(cg.define_residue_num_iterator(mloop)) if len(res)==0: anchor = np.mean([coords[nt1-1], coords[nt2-1]], axis=0) elif len(res)%2==1: anchor = coords[res[int(len(res)//2)]-1] else: anchor = np.mean([ coords[res[int(len(res)//2)-1]-1], coords[res[int(len(res)//2)]-1] ], axis=0) loop_vec = coords[nt1-1] - coords[nt2-1] norm_vec = (loop_vec[1], -loop_vec[0]) norm_vec/=ftuv.magnitude(norm_vec) annot_pos = anchor - norm_vec*18 if _clashfree_annot_pos(annot_pos, coords): ax.annotate(annot_dict[mloop], xy=annot_pos, ha="center", va="center", **text_kwargs ) else: log.info("Cannot annotate %s as '%s' , because of insufficient space.", mloop, annot_dict[mloop])
[docs]def plot_rna(cg, ax=None, offset=(0, 0), text_kwargs={}, backbone_kwargs={}, basepair_kwargs={}, color=True, lighten=0, annotations={}): ''' Plot an RNA structure given a set of nucleotide coordinates .. note:: This function calls set_axis_off on the axis. You can revert this by using ax.set_axis_on() if you like to see the axis. :param cg: A forgi.threedee.model.coarse_grain.CoarseGrainRNA structure :param ax: A matplotlib plotting area :param offset: Offset the plot by these coordinates. If a simple True is passed in, then offset by the current width of the plot :param text_kwargs: keyword arguments passed to matplotlib.pyplot.annotate for plotting of the sequence :param backbone_kwargs: keyword arguments passed to matplotlib.pyplot.plot for plotting of the backbone links :param basepair_kwargs: keyword arguments passed to matplotlib.pyplot.plot for plotting of the basepair links :param lighten: Make circles lighter. A percent value where 1 makes everything white and 0 leaves the colors unchanged :param annotations: A dictionary {elem_name: string} or None. By default, the element names (e.g. "s0") are plotted next to the element. This dictionary can be used to override the default element names by costum strings. To remove individual annotations, assign an empty string to the key. To remove all annotations, set this to None. .. warning:: Annotations are not shown, if there is not enough space. Annotations not shown are logged with level INFO :return: (ax, coords) The axes and the coordinates for each nucleotide ''' log.info("Starting to plot RNA...") import RNA import matplotlib.colors as mc RNA.cvar.rna_plot_type = 1 coords = [] #colors = [] #circles = [] bp_string = cg.to_dotbracket_string() # get the type of element of each nucleotide el_string = cg.to_element_string() # i.e. eeesssshhhhsssseeee el_to_color = {'f': 'orange', 't': 'orange', 's': 'green', 'h': 'blue', 'i': 'yellow', 'm': 'red'} if ax is None: ax = plt.gca() if offset is None: offset = (0, 0) elif offset is True: offset = (ax.get_xlim()[1], ax.get_ylim()[1]) else: pass vrna_coords = RNA.get_xy_coordinates(bp_string) # TODO Add option to rotate the plot for i, _ in enumerate(bp_string): coord = (offset[0] + vrna_coords.get(i).X, offset[1] + vrna_coords.get(i).Y) coords.append(coord) coords = np.array(coords) # First plot backbone bkwargs = {"color":"black", "zorder":0} bkwargs.update(backbone_kwargs) ax.plot(coords[:,0], coords[:,1], **bkwargs) # Now plot basepairs basepairs = [] for s in cg.stem_iterator(): for p1, p2 in cg.stem_bp_iterator(s): basepairs.append([coords[p1-1], coords[p2-1]]) if basepairs: basepairs = np.array(basepairs) if color: c = "red" else: c = "black" bpkwargs = {"color":c, "zorder":0, "linewidth":3} bpkwargs.update(basepair_kwargs) ax.plot(basepairs[:,:,0].T, basepairs[:,:,1].T, **bpkwargs) # Now plot circles for i, coord in enumerate(coords): if color: c = el_to_color[el_string[i]] h,l,s = colorsys.rgb_to_hls(*mc.to_rgb(c)) if lighten>0: l += (1-l)*min(1,lighten) else: l +=l*max(-1, lighten) if l>1 or l<0: print(l) c=colorsys.hls_to_rgb(h,l,s) circle = plt.Circle((coord[0], coord[1]), color=c) else: circle = plt.Circle((coord[0], coord[1]), edgecolor="black", facecolor="white") ax.add_artist(circle) if cg.seq: if "fontweight" not in text_kwargs: text_kwargs["fontweight"]="bold" ax.annotate(cg.seq[i+1],xy=coord, ha="center", va="center", **text_kwargs ) all_coords=list(coords) ntnum_kwargs = {"color":"gray"} ntnum_kwargs.update(text_kwargs) for nt in range(10, cg.seq_length, 10): # We try different angles annot_pos = _find_annot_pos_on_circle(nt, all_coords, cg) if annot_pos is not None: ax.annotate(str(nt), xy=coords[nt-1], xytext=annot_pos, arrowprops={"width":1, "headwidth":1, "color":"gray"}, ha="center", va="center", zorder=0, **ntnum_kwargs) all_coords.append(annot_pos) _annotate_rna_plot(ax, cg, all_coords, annotations, text_kwargs) datalim = ((min(list(coords[:, 0]) + [ax.get_xlim()[0]]), min(list(coords[:, 1]) + [ax.get_ylim()[0]])), (max(list(coords[:, 0]) + [ax.get_xlim()[1]]), max(list(coords[:, 1]) + [ax.get_ylim()[1]]))) ''' min_coord = min(datalim[0][0], datalim[0][1]) max_coord = max(datalim[1][0], datalim[1][1]) datalim = ((min_coord, min_coord), (max_coord, max_coord)) print "min_coord:", min_coord print "max_coord:", max_coord print "datalime:", datalim ''' width = datalim[1][0] - datalim[0][0] height = datalim[1][1] - datalim[0][1] #ax.set_aspect(width / height) ax.set_aspect('equal', 'datalim') ax.update_datalim(datalim) ax.autoscale_view() ax.set_axis_off() return (ax, coords)
def _find_annot_pos_on_circle(nt, coords, cg): for i in range(5): for sign in [-1,1]: a = np.pi/4*i*sign if cg.get_elem(nt)[0]=="s": bp = cg.pairing_partner(nt) anchor = coords[bp-1] else: anchor =np.mean([ coords[nt-2], coords[nt]], axis=0) vec = coords[nt-1]-anchor vec=vec/ftuv.magnitude(vec) rotated_vec = np.array([vec[0]*math.cos(a)-vec[1]*math.sin(a), vec[0]*math.sin(a)+vec[1]*math.cos(a)]) annot_pos = coords[nt-1]+rotated_vec*18 if _clashfree_annot_pos(annot_pos, coords): log.debug("Annot pos on c is %s",annot_pos) return annot_pos return None
[docs]def plot_pdb(filename, ax=None): """ Plot the secondary structure of an RNA in a PDB file using the Graph Layout from the ViennaRNA package and indicate long-range interations and RNA-protein interactions. Interchain RNA-RNA interactions are shown as red lines. Proteins are shown as transparent gray circles with lines indicating the interacting residues. The circle radius corresponds to the number of interacting nucleotides. :param structure: A Bio.PDB.Structure :param ax: Optional. An matplotlib axis object :return: An Axes object (ax) """ structure = Bio.PDB.PDBParser().get_structure('blah', filename) model = list(structure)[0] chain_coords = {} cgs = {} import collections as col # store a list of RNA nucleotides that each protein interacts with protein_interactions = col.defaultdict(set) protein_circles = [] for chain in model: # iterate over RNAs if ftup.contains_rna(chain): # convert to cg and store so that we can convert pdb nucleotide ids to # secondary structure indexes later cg, = ftmc.CoarseGrainRNA.from_pdb(filename, load_chains=chain.id) cgs[chain.id] = cg # plot the structure and store the coordinates (ax, coords) = plot_rna(cg, offset=True, ax=ax) chain_coords[chain.id] = coords for (a1, a2) in ftup.interchain_contacts(structure): # iterate over all the interactions in order to find out which # nucleotides this protein interacts with chain1 = a1.parent.parent chain2 = a2.parent.parent if ftup.is_protein(chain1) and ftup.contains_rna(chain2): # collect all the RNA nucleotides that a protein interacts with sid = cgs[chain2.id].seq.to_integer(fgr.RESID(chain2.id, a2.parent.id))-1 protein_interactions[chain1.id].add((chain2.id, sid)) if ftup.contains_rna(chain1) and ftup.contains_rna(chain2): try: sid1 = cgs[chain1.id].seq.to_integer(fgr.RESID(chain1.id, a1.parent.id))-1 sid2 = cgs[chain2.id].seq.to_integer(fgr.RESID(chain2.id, a2.parent.id))-1 except ValueError: continue coord1 = chain_coords[chain1.id][sid1] coord2 = chain_coords[chain2.id][sid2] ax.plot([coord1[0], coord2[0]], [coord1[1], coord2[1]], 'k-', alpha=0.5) for chain in model: # draw each protein and the links that it has to other nucleotides if ftup.is_protein(chain): # the protein will be positioned at the centroid of the nucleotides # that it interacts with interacting_coords = [np.array(chain_coords[chain_id][nuc_num]) for (chain_id, nuc_num) in protein_interactions[chain.id]] centroid = np.sum(interacting_coords, axis=0) / \ len(interacting_coords) # the size of the circle representing it will be proportional to its # length (in nucleotides) radius = 2 * math.sqrt(len(chain.get_list())) protein_circles += [[centroid[0], centroid[1], radius]] # draw all of the interactions as lines for coord in interacting_coords: ax.plot([coord[0], centroid[0]], [ coord[1], centroid[1]], 'k-', alpha=0.5) protein_circles = np.array(protein_circles) if len(protein_circles) > 0: circles(protein_circles[:, 0], protein_circles[:, 1], protein_circles[:, 2], 'grey', alpha=0.5) # plt.axis('off') return ax

Contents