forgi 2.0.0 documentation

Contents

Source code for forgi.threedee.utilities.graph_pdb

#!/usr/bin/python
from __future__ import print_function
from __future__ import division
from builtins import map
from builtins import range
from builtins import object
from builtins import zip, str

import itertools as it
import collections as col
import os.path as op
import warnings
import random
import sys
import math
import json
import operator
from pprint import pprint
import logging
import uuid


import numpy as np
import numpy.linalg as nl
import numpy.testing as nptest
import scipy.optimize as so
import Bio.PDB as bp
import Bio.PDB as bpdb
import Bio.PDB.PDBExceptions

from logging_exceptions import log_to_exception

import forgi.threedee.utilities.average_stem_vres_atom_positions as ftus
import forgi.utilities.debug as fud
import forgi.threedee.utilities.my_math as ftum
import forgi.threedee.utilities.pdb as ftup
import forgi.threedee.utilities.vector as cuv
import forgi.threedee.utilities.vector as ftuv
import forgi
from forgi.threedee.utilities.modified_res import change_residue_id
from forgi.utilities.exceptions import CgConstructionError
from forgi.threedee.utilities.pdb import AtomName
log = logging.getLogger(__name__)

REFERENCE_CATOM = AtomName("C1'")


try:
    profile  # The @profile decorator from line_profiler (kernprof)
except:
[docs] def profile(x): return x
[docs]def stem_stem_orientation(cg, s1, s2): ''' Calculate the orientation of stem s2 in relation to stem s1 as described by 3 parameters: 1. The distance between the closest points of the two stems. 2. The angle between s1 and s2 in the plane formed by the axis of the first stem and the vector between the two points closest to each on both stems. 3. The angle of s2 out of the plane formed by their axes. :param bg: The BulgeGraph containing the stems. :param s1: The name of the first stem :param s2: The name of the second stem :return: (x,y,z) where x,y and z are the parameters described in the description above. ''' # shorten the names a little bit s1_p0 = cg.coords[s1][0] s1_p1 = cg.coords[s1][1] s2_p0 = cg.coords[s2][0] s2_p1 = cg.coords[s2][1] # The vectors of the axes of the cylinders s1_vec = cg.coords[s1][1] - cg.coords[s1][0] s2_vec = cg.coords[s2][1] - cg.coords[s2][0] # the minimum distance between the two stems, which are represented # as line segments (i1, i2) = cuv.line_segment_distance(s1_p0, s1_p1, s2_p0, s2_p1) i_vec = i2 - i1 i_rej = cuv.vector_rejection(i_vec, s1_vec) plane_vec = np.cross(i_rej, s1_vec) # s2_proj is in the intersection plane s2_proj_in = cuv.vector_rejection(s2_vec, plane_vec) # s2 proj_out is out of the intersection plane s2_proj_out = cuv.vector_rejection(s2_vec, i_rej) # the normal of the plane defined by the two stem vectors #ang1 = cuv.vec_angle(s1_vec, s2_proj_out) #ang2 = cuv.vec_angle(s1_vec, s2_proj_in) ang1 = cuv.vec_angle(s2_proj_in, s1_vec) ang2 = cuv.vec_angle(s2_proj_out, s1_vec) #ang3 = cuv.vec_angle(s1_vec, s2_proj_in) #ang4 = cuv.vec_angle(s1_vec, s2_proj_out) # ever so slightly increased to prevent domain errors # in the lateral_offset calculation below dist = cuv.magnitude(i_vec) + 0.0001 ortho_offset = cuv.magnitude(i_rej) lateral_offset = math.sqrt(dist * dist - ortho_offset * ortho_offset) return (cuv.magnitude(i_vec), ang1, ang2, cuv.vec_angle(s1_vec, s2_vec), lateral_offset, ortho_offset)
[docs]def base_normals(pdb_filename): ''' Return a list of the normals for each base in the structure. As defined by the average of the cross products between the C2-C5 and C2-C6 vectors and the N3-C6 and N3-C5 vectors. The origin of the vector will be the centroid of these four atoms. :param pdb_filename: The name of the pdb file containing the structure :return: A list of pairs containing the origin the normal as well as the normal itself. ''' with warnings.catch_warnings(): warnings.simplefilter("ignore") struct = bp.PDBParser().get_structure('t', pdb_filename) chain = list(struct.get_chains())[0] origin_norms = [] for res in chain: c2 = res['C2'].get_vector().get_array() c5 = res['C5'].get_vector().get_array() c6 = res['C6'].get_vector().get_array() n3 = res['N3'].get_vector().get_array() v1 = cuv.normalize(np.cross(c6 - c2, c5 - c2)) v2 = cuv.normalize(np.cross(c6 - n3, c5 - n3)) # take the average of the two, for accuracy or something v_norm = (v1 + v2) / 2. origin = (c2 + c5 + c6 + n3) / 4. origin_norms += [(origin, v_norm)] return origin_norms
[docs]def get_twist_angle(coords, twists): ''' Get the angle of the twists with respect to each other. :param coords: The coordinates of the ends of the stem. :param twists: The two twist vectors. :return angle: The angle between the two twist vectors. ''' stem_vec = coords[1] - coords[0] basis = cuv.create_orthonormal_basis(stem_vec, twists[0]) twist2 = cuv.change_basis(twists[1], basis, cuv.standard_basis) #assert_allclose(twist2[0], 0., rtol=1e-7, atol=1e-7) angle = math.atan2(twist2[2], twist2[1]) return angle
[docs]def twist2_from_twist1(stem_vec, twist1, angle): ''' Get an orientation for the second twist which will place it an angle of angle from the first twist. :param stem_vec: The vector of the stem. :param twist1: The vector of the first twist. :param angle: The angular difference between the two twists. ''' basis = cuv.create_orthonormal_basis(stem_vec, twist1) twist2_new = np.array([0., math.cos(angle), math.sin(angle)]) twist2 = np.dot(basis.transpose(), twist2_new) #twist2 = cuv.change_basis(twist2_new, cuv.standard_basis, basis) return twist2
[docs]def get_twist_parameter(twist1, twist2, u_v): ''' Calculate how much stem1 must be twisted for its twist vector to coincide with that of stem2. :param twist1: The twist notator of stem1 :param twist2: The twist notator of stem2 :param u_v: The parameters u and v for rotating stem2 onto stem1 ''' u, v = u_v rot_mat1 = cuv.rotation_matrix("z", v) rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.) twist2_new = np.dot(rot_mat1, twist2) twist2_new = np.dot(rot_mat2, twist2_new) # print "get_twist_parameter twist2:", twist2_new return math.atan2(twist2_new[2], twist2_new[1])
[docs]def get_stem_orientation_parameters(stem1_vec, twist1, stem2_vec, twist2): ''' Return a parameterization of the orientation of stem2 with respect to stem1. stem1 -> bulge -> stem2 :param stem1_vec: The vector representing the axis of stem1 :param twist1: The twist of stem1 closest to the bulge :param stem2_vec: The vector representing the axis of stem2 :returns: (r,u,v,t) where r,u,v = the stem orientation in polar coordinates and t is the twist parameter. ''' # Since we will denote the orientation of stem2 with respect to stem1 # We first need to define a new coordinate system based on stem1 stem1_basis = cuv.create_orthonormal_basis(stem1_vec, twist1) log.debug("Stem1 basis \n%s", stem1_basis) # Transform the vector of stem2 to the new coordinate system stem2_new_basis = cuv.change_basis(stem2_vec, stem1_basis, cuv.standard_basis) log.debug("Stem2 in basis of stem 1 %s", stem2_new_basis) twist2_new_basis = cuv.change_basis(twist2, stem1_basis, cuv.standard_basis) # Convert the cartesian coordinates to polar coordinates (r, u, v) = cuv.spherical_cartesian_to_polar(stem2_new_basis) t = get_twist_parameter(twist1, twist2_new_basis, (u, v)) log.debug("r %s, u %s, v %s, t %s", r, u, v, t) return (r, u, v, t)
[docs]def get_stem_separation_parameters(stem, twist, bulge): ''' Parameterize the location of the bulge with respect to the stem. :param stem: The stem vector. :param bulge: the bulge vector. ''' stem_basis = cuv.create_orthonormal_basis(stem, twist) bulge_new_basis = cuv.change_basis(bulge, stem_basis, cuv.standard_basis) return cuv.spherical_cartesian_to_polar(bulge_new_basis)
[docs]def get_stem_twist_and_bulge_vecs(bg, bulge, connections): ''' Return the vectors of the stems and of the twists between which we want to calculate angles. The two vectors will be defined as follows: s1e -> s1b -> b -> s2b -> s2e The twists will be the two closest to the bulge. :param bulge: The name of the bulge separating the two helices. :param connections: The two stems that are connected to this bulge. :return: (stem1, twist1, stem2, twist2, bulge) ''' s1 = connections[0] s2 = connections[1] #s1d = bg.defines[s1] #s2d = bg.defines[s2] mids1 = bg.coords[s1] twists1 = bg.twists[s1] mids2 = bg.coords[s2] twists2 = bg.twists[s2] # find out which sides of the stems are closest to the bulge # the results will be indexes into the mids array (s1b, s1e) = bg.get_sides(s1, bulge) log.debug("Side of 1st stem %s attached to %s is %s ", s1, bulge, s1b) (s2b, s2e) = bg.get_sides(s2, bulge) log.debug("Side of 2nd stem %s attached to %s is %s ", s2, bulge, s2b) # Create directional vectors for the stems # For ML: For IL: -> -> -> # | A # V | # * -> * stem1_vec = mids1[s1b] - mids1[s1e] bulge_vec = mids2[s2b] - mids1[s1b] stem2_vec = mids2[s2e] - mids2[s2b] #twists1_vec = [twists1[s1b], twists1[s1e]] #twists2_vec = [twists2[s2e], twists2[s2b]] return (stem1_vec, twists1[s1b], stem2_vec, twists2[s2b], bulge_vec)
[docs]def stem2_pos_from_stem1(stem1, twist1, params): ''' Get the starting point of a second stem, given the parameters about where it's located with respect to stem1 :param stem1: The vector representing the axis of stem1's cylinder :param twist1: The twist parameter of stem1 :param params: The parameters describing the position of stem2 wrt stem1 ''' (r, u, v) = params stem2 = cuv.spherical_polar_to_cartesian((r, u, v)) stem1_basis = cuv.create_orthonormal_basis(stem1, twist1) stem2_start = np.dot(stem1_basis.transpose(), stem2) return stem2_start
[docs]def stem2_pos_from_stem1_1(transposed_stem1_basis, params): ''' Get the starting point of a second stem, given the parameters about where it's located with respect to stem1 The params of the stat describe the change in the coordinate system of stem1. This function converts that to a carthesian vector the standard coordinate system :param transposed_stem1_basis: The vtransposed basis of the first stem. :param params: The parameters describing the position of stem2 wrt stem1 (i.e. the carthesian vector in standard coordinates pointing from stem1 to stem2) ''' (r, u, v) = params stem2 = cuv.spherical_polar_to_cartesian((r, u, v)) stem2_start = np.dot(transposed_stem1_basis, stem2) return stem2_start
# Seems to be unused!
[docs]def twist2_orient_from_stem1(stem1, twist1, u_v_t): ''' Calculate the position of the twist factor of the 2nd stem from its parameters and the first stem. :param stem1: The vector representing the axis of stem1's cylinder :param twist1: The twist factor of stem1. :param u_v_t: The parameters describing how the twist of stem2 is oriented with respect to stem1. A triple `(u, v, t)` ''' u, v, t = u_v_t twist2_new = np.array([0., math.cos(t), math.sin(t)]) rot_mat1 = cuv.rotation_matrix("z", v) rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.) rot_mat = np.dot(rot_mat2, rot_mat1) twist2_new = np.dot(nl.inv(rot_mat), twist2_new) ''' twist2_new = dot(inv(rot_mat2), twist2_new) twist2_new = dot(inv(rot_mat1), twist2_new) ''' stem1_basis = cuv.create_orthonormal_basis(stem1, twist1) twist2_new_basis = cuv.change_basis(twist2_new, cuv.standard_basis, stem1_basis) return twist2_new_basis
[docs]def twist2_orient_from_stem1_1(stem1_basis, u_v_t): ''' Calculate the position of the twist factor of the 2nd stem from its parameters and the first stem. :param stem1: The vector representing the axis of stem1's cylinder :param twist1: The twist factor of stem1. :param u_v_t: The parameters describing how the twist of stem2 is oriented with respect to stem1. A triple `(u, v, t)` ''' u, v, t = u_v_t twist2_new = np.array([0., math.cos(t), math.sin(t)]) rot_mat1 = cuv.rotation_matrix("z", v) rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.) rot_mat = np.dot(rot_mat2, rot_mat1) #assert np.allclose(nl.inv(rot_mat), rot_mat.T) twist2_new = np.dot(rot_mat.T, twist2_new) twist2_new_basis = np.dot(stem1_basis, twist2_new) return twist2_new_basis
[docs]def stem2_orient_from_stem1(stem1, twist1, r_u_v): ''' Calculate the orientation of the second stem, given its parameterization and the parameterization of stem1 :param stem1: The vector representing the axis of stem1's cylinder :param twist1: The twist factor of stem1. :param r_u_v: The orientation of stem2 wrt stem1, a triple `(r, u, v)` ''' stem1_basis = cuv.create_orthonormal_basis(stem1, twist1) return stem2_orient_from_stem1_1(stem1_basis.transpose(), r_u_v)
[docs]def stem2_orient_from_stem1_1(stem1_basis, r_u_v): ''' Calculate the orientation of the second stem, given its parameterization and the parameterization of stem1 :param stem1: The vector representing the axis of stem1's cylinder :param twist1: The twist factor of stem1. :param r_u_v: The orientation of stem2 wrt stem1, a triple `(r, u, v)` ''' r, u, v = r_u_v stem2_in_basis1 = cuv.spherical_polar_to_cartesian((r, u, v)) stem2 = np.dot(stem1_basis, stem2_in_basis1) return stem2
[docs]def get_angle_stat_geometry(stem1_vec, twist1, stem2_vec, twist2, bulge_vec): """ :param stem1_vec: The vector of the first stem, pointing TOWARDS the bulge :param twist1: The twist vector at the side of stem1 closest to the bulge :param stem2_vec: The vector of the second stem, pointing AWAY FROM the bulge :param twist2: The twist vector at the side of stem2 closest to the bulge :param bulge_vec: The vector from stem1 to stem2 :returns: T 6-tuple: u,v (the orientation parameters), t (twist parameter) and r1, u1, v1 (the seperation parameters) \ A \ stem1 / \ / stem2 V / ---------> bulge """ try: # Get the orientations for orienting these two stems (r, u, v, t) = get_stem_orientation_parameters(stem1_vec, twist1, stem2_vec, twist2) (r1, u1, v1) = get_stem_separation_parameters( stem1_vec, twist1, bulge_vec) except ZeroDivisionError as e: with log_to_exception(log, e): log.error("Cannot get stat. The 3D coodinates are probably wrong.") raise return u, v, t, r1, u1, v1
[docs]@profile def get_broken_ml_deviation(cg, broken_ml_name, fixed_stem_name, virtual_stat): """ If we assgin a stat to a broken ml-segment, how much would the attached stem deviate from its true location. Calculates the position of a '"virtual stem", which would be placed after the broken ml-segment, if it was a true ml-segment with the virtual stat assigned. Then calculates the deviation between this virtual stem and the actual stem. :param cg: The CoarseGrainRNA :param broken_ml_name: The name of the ml-segment of interest. It should not be part of cg.mst. :param fixed_stem_name: The name of a stem (e.g. "s0") attached to the broken ml segment. This stem will be used as reference, For the other stem attached to the broken ml-segment (original_stem), the virtual stem position will be calculated. :param virtual_stat: The stat assigned to the broken ml segment, in the direction from fixed_stem_name to the other stem. :returns: A triple: positional_deviation, angular_deviation and twist_deviation. positional_deviation measures how far the start of the virtual stem is from the start of the true ("original") stem. Angular deviation measures (in radians) the differece in the stem's orientation. twist_deviation measures the angle (in radians) between the two stem's twist vectors. """ log.debug("Getting broken ML deviation for %s attached to %s " "using stat %s", broken_ml_name, fixed_stem_name, virtual_stat.pdb_name) s1, s2 = cg.edges[broken_ml_name] if s1 == fixed_stem_name: orig_stem_name = s2 elif s2 == fixed_stem_name: orig_stem_name = s1 else: raise ValueError("fixed stem {} is not attached to ml {} with " "edges {}".format(fixed_stem_name, broken_ml_name, [s1, s2])) sides = cg.get_sides(fixed_stem_name, broken_ml_name) fixed_s_vec = cg.coords.get_direction(fixed_stem_name) if sides[0] == 0: fixed_s_vec = -fixed_s_vec s_twist = cg.twists[fixed_stem_name][sides[0]] fixed_stem_basis = ftuv.create_orthonormal_basis(fixed_s_vec, s_twist) vbulge_vec, vstem_vec, vstem_twist = _virtual_stem_from_bulge( fixed_stem_basis, virtual_stat) vstem_vec *= 5 vstem_coords0 = cg.coords[fixed_stem_name][sides[0]] + vbulge_vec vstem_coords1 = vstem_coords0 + vstem_vec sides2 = cg.get_sides(orig_stem_name, broken_ml_name) orig_coords0 = cg.coords[orig_stem_name][sides2[0]] orig_coords1 = cg.coords[orig_stem_name][sides2[1]] orig_stem_vec = orig_coords1 - orig_coords0 true_bulge_vec = orig_coords0 - cg.coords[fixed_stem_name][sides[0]] pos_dev = (ftuv.vec_distance(orig_coords0, vstem_coords0)) ang_dev = ftuv.vec_angle(vstem_vec, orig_stem_vec) twist_dev = ftuv.vec_angle( cg.twists[orig_stem_name][sides2[0]], vstem_twist) log.debug("Deviation: pos %s, orient %s, twist: %s", pos_dev, math.degrees(ang_dev), math.degrees(twist_dev)) # For debugging #max_diff = 6 #max_adiff = math.radians(3*max_diff) # if pos_dev < max_diff and ang_dev<max_adiff and twist_dev<2*max_adiff: if False: # plotting-code used for debugging pos_adev = ftuv.vec_angle(true_bulge_vec, vbulge_vec) log.info("Deviation: pos %s, %s orient %s, twist: %s", pos_dev, math.degrees(pos_adev), math.degrees(ang_dev), math.degrees(twist_dev)) log.info("Length: virtual: %s original: %s", ftuv.magnitude( vstem_vec), ftuv.magnitude(orig_stem_vec)) import matplotlib.pyplot as plt _plot_junction_2d(cg, broken_ml_name) plt.plot([cg.coords[fixed_stem_name][sides[0]][0], cg.coords[orig_stem_name][sides2[0]][0]], [cg.coords[fixed_stem_name][sides[0]][1], cg.coords[orig_stem_name][sides2[0]][1]], ".-", label="true bulge") plt.plot([cg.coords[fixed_stem_name][sides[0]][0], vstem_coords0[0]], [cg.coords[fixed_stem_name][sides[0]][1], vstem_coords0[1]], ".-", label="virtual bulge") plt.plot([vstem_coords0[0], vstem_coords1[0]], [vstem_coords0[1], vstem_coords1[1]], "s-", label="virtual" + orig_stem_name) plt.legend() plt.show() return pos_dev, ang_dev, twist_dev
def _plot_element(cg, elem, style="o-", name_suffix=""): import matplotlib.pyplot as plt plt.plot([cg.coords[elem][0][0], cg.coords[elem][1][0]], [cg.coords[elem][0][1], cg.coords[elem][1][1]], style, label=elem + name_suffix) def _plot_junction_2d(cg, broken_ml): """ TODO: Move this to a proper location """ import matplotlib.pyplot as plt plotted = set() _plot_element(cg, broken_ml, name_suffix=" broken") elem = cg.get_next_ml_segment(broken_ml) while elem != broken_ml: _plot_element(cg, elem) for s in cg.edges[elem]: if s not in plotted: _plot_element(cg, s) plotted.add(s) elem = cg.get_next_ml_segment(elem) def _virtual_stem_from_bulge(prev_stem_basis, stat): """ Return a virtual stem with length 1 that would be placed by stat and prev_stem :param prev_stem_basis: The basis of the previous stem. :param stat: The angle stat that describes the orientation of the virtual stem from the previous stem. """ transposed_stem1_basis = prev_stem_basis.transpose() start_location = stem2_pos_from_stem1_1( transposed_stem1_basis, stat.position_params()) stem_orientation = stem2_orient_from_stem1_1(transposed_stem1_basis, [1] + list(stat.orientation_params())) twist1 = twist2_orient_from_stem1_1( transposed_stem1_basis, stat.twist_params()) return start_location, stem_orientation, twist1
[docs]def get_centroid(chain, residue_num): """ :param residue_num: A list of integers """ residue_num = [int(i) for i in residue_num] #print >>sys.stderr, "residue_num:", residue_num atoms = [] for i in residue_num: try: atoms += [chain[i][REFERENCE_CATOM]] except KeyError: # the C1* atom probably doesn't exist continue vectors = [atom.get_vector().get_array() for atom in atoms] return cuv.get_vector_centroid(vectors)
# Seems to be unused!
[docs]def get_bulge_centroid(chain, define): i = 0 res_nums = [] while i < len(define): res_nums += range(int(define[i]), int(define[i + 1]) + 1) i += 2 #print >>sys.stderr, "res_nums:", res_nums return get_centroid(chain, res_nums)
[docs]def get_furthest_c_alpha(cg, chain, stem_end, d): ''' Get the position of the c-alpha atom furthest from the end of the stem. ''' seq_ids = True max_dist = 0 furthest_pos = None res_ids = it.chain(*cg.get_resseqs(d, seq_ids=seq_ids)) for chainId, i in res_ids: # seq_ids now contain chain try: c_apos = chain[i][REFERENCE_CATOM].get_vector().get_array() except KeyError as ke: print("Nucleotide %s missing in element %s" % (str(i), d), file=sys.stderr) continue dist = cuv.magnitude(stem_end - c_apos) if dist >= max_dist: max_dist = dist furthest_pos = c_apos return furthest_pos
[docs]def stem_from_chains(cg, chains, elem_name): """ This function combines get_mids and get_twists into one more efficient routine. :param chains: A dictionary {chain_id: Biopython_PDB_chain} :param elem_name: e.g. "s0" """ stem_length = cg.stem_length(elem_name) template_filename = 'ideal_1_%d_%d_%d.pdb' % (stem_length, stem_length + 1, stem_length * 2) filename = forgi.threedee.data_file(op.join('data', template_filename)) try: ideal_chain = ftup.get_first_chain(filename) except IOError: if stem_length > 40: raise CgConstructionError("Cannot create coordinates. " "Helices with lengths greater than 40 are currently not supported in forgi.") else: raise stem_chain = bpdb.Chain.Chain(' ') try: residue_ids = cg.get_resseqs(elem_name, seq_ids=True) except IndexError as e: with log_to_exception(log, e): log.error("seq_ids were '%r'", cg.seq_ids) raise new_residue_ids = [] for strand in residue_ids: for res_id in strand: log.debug("Adding residue %s", res_id) original_residue = chains[res_id.chain][res_id.resid] residue = original_residue.copy() try: stem_chain.add(residue) except Bio.PDB.PDBExceptions.PDBConstructionException as e: log.info( "Temporarily changing resid %s to uuid, because this id is present twice (with different chain) in one stem", residue.id) change_residue_id(residue, uuid.uuid4()) stem_chain.add(residue) new_residue_ids.append(residue.id) rotran = ftup.pdb_rmsd(stem_chain, ideal_chain, sidechains=False, superimpose=True )[2] # average length of a base-pair: 2.547 mult = 0.01 # Stems with 1 bp have a tiny length ideal_coords = np.array([[0., 0., mult], np.array([0., 0., -mult]) + (stem_length - 1) * np.array([0., 0., -2.547])]) coords = np.dot(ideal_coords, rotran[0]) + rotran[1] stem_direction = coords[1] - coords[0] # the first nucleotide of the first strand # and the last nucleotide of the second strand first_res = residue_ids[0][0] start_vec1 = chains[first_res.chain][first_res.resid][REFERENCE_CATOM].coord - coords[0] last_res = residue_ids[0][-1] end_vec1 = chains[last_res.chain][last_res.resid][REFERENCE_CATOM].coord - coords[1] # the last nucleotide of the first strand # and the first nucleotide of the second strand first_res_a = residue_ids[1][-1] try: start_vec1a = chains[first_res_a.chain][first_res_a.resid][REFERENCE_CATOM].coord - coords[0] except KeyError as e: log.error("Atoms are %s", chains[first_res_a.chain][first_res_a.resid].child_dict) raise last_res_a = residue_ids[1][0] end_vec1a = chains[last_res_a.chain][last_res_a.resid][REFERENCE_CATOM].coord - coords[1] notch1 = cuv.vector_rejection(start_vec1, stem_direction) notch2 = cuv.vector_rejection(end_vec1, stem_direction) notch1a = cuv.vector_rejection(start_vec1a, stem_direction) notch2a = cuv.vector_rejection(end_vec1a, stem_direction) twists = (cuv.normalize(notch1 + notch1a), cuv.normalize(notch2 + notch2a)) # Perform some verification if False: verify_vatom_positions(residue_ids, chains, coords, twists, "stem_{}_from_chain".format(elem_name)) return coords, twists
[docs]def verify_vatom_positions(residue_ids, chains, coords, twists, label=""): """ :param coords: The coords of ONE stem """ res1, res2 = residue_ids[0][0], residue_ids[1][-1] res3, res4 = residue_ids[0][-1], residue_ids[1][0] from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt fig = plt.figure() ax = Axes3D(fig) strand0 = np.array( [chains[r.chain][r.resid]["C1'"].coord for r in residue_ids[0]]) strand1 = np.array( [chains[r.chain][r.resid]["C1'"].coord for r in residue_ids[1]]) ax.plot(strand0[:, 0], strand0[:, 1], strand0[:, 2], "o-", label="forward strand") ax.plot(strand1[:, 0], strand1[:, 1], strand1[:, 2], "o-", label="backwards strand") ax.plot([chains[res1.chain][res1.resid]["C1'"].coord[0], chains[res2.chain][res2.resid]["C1'"].coord[0]], [chains[res1.chain][res1.resid]["C1'"].coord[1], chains[res2.chain][res2.resid]["C1'"].coord[1]], [chains[res1.chain][res1.resid]["C1'"].coord[2], chains[res2.chain][res2.resid]["C1'"].coord[2]], "--", label="bp") ax.plot([chains[res3.chain][res3.resid]["C1'"].coord[0], chains[res4.chain][res4.resid]["C1'"].coord[0]], [chains[res3.chain][res3.resid]["C1'"].coord[1], chains[res4.chain][res4.resid]["C1'"].coord[1]], [chains[res3.chain][res3.resid]["C1'"].coord[2], chains[res4.chain][res4.resid]["C1'"].coord[2]], "--", label="bp") ax.plot(coords[:, 0], coords[:, 1], coords[:, 2], "o-", label="STEM") twist1 = np.array([coords[0], coords[0] + twists[0], coords[0] + twists[0] * 10]) twist2 = np.array([coords[1], coords[1] + twists[1], coords[1] + twists[1] * 10]) ax.plot(twist1[:, 0], twist1[:, 1], twist1[:, 2], "o-", label="Twist1") ax.plot(twist2[:, 0], twist2[:, 1], twist2[:, 2], "o-", label="Twist1") # Virtual atoms vres_pos = [] vres_bases = [] c1_vecs = [] for i, res in enumerate(residue_ids[0]): pos = virtual_res_3d_pos_core( coords, twists, i, len(residue_ids[0]))[0] vres_pos.append(pos) basis = virtual_res_basis_core(coords, twists, i, len(residue_ids[0])) vres_bases.append(basis) c1_vecs.append(ftuv.change_basis( chains[res.chain][res.resid]["C1'"].coord - vres_pos[-1], vres_bases[-1], ftuv.standard_basis)) #print("C1 vecs:", c1_vecs) av_c1_vec = np.sum(c1_vecs, axis=0) / len(c1_vecs) #print("Av C1' vec = ", av_c1_vec) virtual_c1s = [] for pos, basis in zip(vres_pos, vres_bases): virtual_c1s.append(ftuv.change_basis( av_c1_vec, ftuv.standard_basis, basis) + pos) virtual_c1s = np.array(virtual_c1s) ax.plot(virtual_c1s[:, 0], virtual_c1s[:, 1], virtual_c1s[:, 2], "o", label="virtual C1'") ax.set_title(label) ax.legend() plt.show()
#assert False
[docs]def total_helix_rotation(coords, twists, stem_len): """ Calculate the total rotation of the helix in radians from the twists. When we calculate the angle between the two twists, we only know the true rotation modulo 2*pi (i.e. a rotation of 45 degrees could mean 45 degrees or 405 degrees). Depending on the number of nucleotides and knowledge of the ideal helix (which turns roughly 30 degrees per base-pair), this function outputs the correct result. """ stem_vec = coords[1] - coords[0] # the angle of the second twist with respect to the first stem_basis = cuv.create_orthonormal_basis(stem_vec, twists[0]) t2 = cuv.change_basis(twists[1], stem_basis, cuv.standard_basis) twist_angle = ftum.atan3(t2[2], t2[1]) # calculated from an ideal length 30 helix average_ang_per_nt = 0.636738030735 expected_total_ang = (stem_len - 1) * average_ang_per_nt expected_twist_ang = expected_total_ang % (2 * math.pi)
[docs]def virtual_res_3d_pos_core(coords, twists, i, stem_len, stem_inv=None): ''' Calculate the virtual position of the i'th nucleotide in the stem. The virtual position extrapolates the position of the residues based on the twists of the helix. :return: A tuple containing the point located on the axis of the stem and a vector away from that point in the direction of the residue. ''' #stem_len = bg.defines[stem][1] - bg.defines[stem][0] + 1 stem_vec = coords[1] - coords[0] # the position of the virtual residue along the axis of # the stem if stem_len == 1: vres_stem_pos = coords[0] else: vres_stem_pos = coords[0] + (i / float(stem_len - 1)) * stem_vec # the angle of the second twist with respect to the first if stem_inv is None: stem_basis = cuv.create_orthonormal_basis(stem_vec, twists[0]) t2 = cuv.change_basis(twists[1], stem_basis, cuv.standard_basis) else: t2 = np.dot(stem_inv, twists[1]) ang = ftum.atan3(t2[2], t2[1]) # calculated from an ideal length 30 helix average_ang_per_nt = 0.636738030735 expected_ang = (stem_len - 1) * average_ang_per_nt expected_dev = expected_ang while (expected_dev - (2 * math.pi) > 0): expected_dev -= 2 * math.pi # expected_dev uis now between 0 and 360 degrees if ang < expected_dev: forward = 2 * math.pi + ang - expected_dev backward = expected_dev - ang else: forward = ang - expected_dev backward = 2 * math.pi + expected_dev - ang if forward < backward: ang = expected_ang + forward else: ang = expected_ang - backward if stem_len == 1: ang = 0. else: ang_per_nt = ang / float(stem_len - 1) ang = ang_per_nt * i # the basis vectors for the helix along which the # virtual residues will residue u = twists[0] v = cuv.normalize(np.cross(stem_vec, twists[0])) ang_offset = 0.9 # equation for a circle in 3-space return (vres_stem_pos, u * math.cos(ang) + v * math.sin(ang), u * math.cos(ang + ang_offset) + v * math.sin(ang + ang_offset), u * math.cos(ang - ang_offset) + v * math.sin(ang - ang_offset))
[docs]def virtual_res_3d_pos(bg, stem, i, stem_inv=None, stem_length=None): if stem_length is None: return virtual_res_3d_pos_core(bg.coords[stem], bg.twists[stem], i, bg.stem_length(stem), stem_inv) else: return virtual_res_3d_pos_core(bg.coords[stem], bg.twists[stem], i, stem_length, stem_inv)
[docs]def virtual_res_basis_core(coords, twists, i, stem_len, vec=None): ''' Define a basis based on the location of a virtual stem residue. The basis will be defined by the direction of the stem, the direction of the virtual residue. :param bg: The BulgeGraph structure :param stem: The name of the stem :param i: The i'th residue of the stem :return: A 3x3 matrix defining the coordinate system above. ''' if vec is None: (pos, vec, vec_l, vec_r) = virtual_res_3d_pos_core(coords, twists, i, stem_len) stem_vec = coords[1] - coords[0] return cuv.create_orthonormal_basis(stem_vec, vec)
[docs]def virtual_res_basis(bg, stem, i, vec=None): return virtual_res_basis_core(bg.coords[stem], bg.twists[stem], i, bg.stem_length(stem), vec)
[docs]def pos_to_spos(bg, s1, i1, s2, i2): ''' Convert the location of s2, i2 into the coordinate system defined by (s1, i1) :param bg: The BulgeGraph containing the stems :param s1: The basis stem name :param i1: The basis res position :param s2: The stem containing the nucleotide to be converted :param i2: The nucleotide to be converted position ''' sbasis = virtual_res_basis(bg, s1, i1) (s1_pos, s1_vec, s1_vec_l, s1_vec_r) = virtual_res_3d_pos(bg, s1, i1) (s2_pos, s2_vec, s2_vec_l, s2_vec_r) = virtual_res_3d_pos(bg, s2, i2) #rpos = (s2_pos + 7. * s2_vec) - (s1_pos + 7 * s1_vec) rpos = (s2_pos + 7. * s2_vec) - (s1_pos) # print "sbasis:", sbasis spos = cuv.change_basis(rpos, sbasis, cuv.standard_basis) ''' if spos[1] ** 2 + spos[2] ** 2 < 5 and spos[0] > -5 and spos[0] < 5: print >>sys.stderr, "spos:", spos, s1, i1, s2, i2 ''' return spos
""" def spos_to_pos(bg, stem, i, spos): ''' Convert the location of spos from the coordinate system of (stem, i) into the standard coordinate system. :param bg: The BulgeGraph :param stem: The name of the stem in the BulgeGraph :param i: The i'th residue in 'stem' which will define the coordinate system :param spos: The position in the alternate coordinate system :return: The coordinates in the cartesian coordinate system of the rest of the model. ''' if stem in bg.vbases and i in bg.vbases[stem]: sbasis=bg.vbases[stem][i] else: sbasis = virtual_res_basis(bg, stem, i) pos = cuv.change_basis(spos, cuv.standard_basis, sbasis) try: (s1_pos, s1_vec, s1_vec_l, s1_vec_r) = bg.v3dposs[stem][i] except KeyError as e: log.info("in spos_to_pos: KeyError {}. Adding virtual residues for stem {}".format(e, stem)) add_virtual_residues(bg, stem) (s1_pos, s1_vec, s1_vec_l, s1_vec_r) = bg.v3dposs[stem][i] #return pos + (s1_pos + s1_vec) #TODO BT: THIS SEEMS WRONG return pos + s1_pos """
[docs]def get_residue_type(i, stem_len): ''' Each nucleotide will be classified according to its position within the stem. That way, the distribution of surrounding nucleotides will be conditioned on the type of nucleotides. This is important due to the fact that nucleotides at the end of a stem may have other stem nucleotides in the direction of the stem vector. Nucleotides, in the middle shoubulge not due to the excluded volume of the stem they occupy. :param i: The position of the nucleotide. :param stem_len: The length of the stem. :return: The type of nucleotide position. ''' assert(i < stem_len) return 0
[docs]def junction_virtual_res_distance(bg, bulge): ''' Compute the distance between the two virtual residues flanking a bulge region. :param bg: The BulgeGraph containing the bulge. :param bulge: The name of the bulge. ''' cs = list(bg.edges[bulge]) (s1b, s1e) = bg.get_sides(cs[0], bulge) (s2b, s2e) = bg.get_sides(cs[1], bulge) if s1b == 1: res = bg.v3dposs[cs[0]][bg.stem_length(cs[0]) - 1] else: res = bg.v3dposs[cs[0]][0] (vr1_p, vr1_v, vr1_v_l, vr1_v_r) = res if s2b == 1: res = bg.v3dposs[cs[1]][bg.stem_length(cs[1]) - 1] else: res = bg.v3dposs[cs[1]][0] (vr2_p, vr2_v, vr2_v_l, vr2_v_r) = res dist2 = cuv.vec_distance((vr1_p + 7. * vr1_v), (vr2_p + 7. * vr2_v)) return dist2
""" def get_strand_atom_vrn(bg, s, i): ''' Return the strand and which atom to use for the adjacent nucleotide distance calculation. ''' if i == 0: return (0, 'P', 0) # this might have to just be bg.stem_length(s) if i == 1: return (0, 'O3*', bg.stem_length(s) - 1) if i == 2: return (1, 'P', bg.stem_length(s) - 1) if i == 3: return (1, 'O3*', 0) """
[docs]def junction_virtual_atom_distance(bg, bulge): ''' Compute the distance between the O3' atom and P' atom of the two residues that flank the junction segment. :param bg: The BulgeGraph containing the bulge. :param bulge: The name of the bulge :return: A single number corresponding to the distance above. ''' connecting_stems = bg.connections(bulge) (i1, k1) = bg._get_sides_plus(connecting_stems[0], bulge) (i2, k2) = bg._get_sides_plus(connecting_stems[1], bulge) pos1 = bg.defines[connecting_stems[0]][i1] pos2 = bg.defines[connecting_stems[1]][i2] if bulge[0] == "m": assert list(sorted([pos1, pos2])) == bg.flanking_nucleotides(bulge) if i1 == 0 or i1 == 2: a1 = "P" else: a1 = "O3'" if i2 == 0 or i2 == 2: a2 = "P" else: a2 = "O3'" assert a1 != a2 dist = cuv.magnitude(bg.virtual_atoms( pos1)[a1] - bg.virtual_atoms(pos2)[a2]) # if bg.element_length(bulge)==0: # partner1 = bg.pairing_partner(pos1) # partner2 = bg.pairing_partner(pos2) # dist2 = cuv.magnitude(bg.virtual_atoms(pos1)[a2]-bg.virtual_atoms(pos2)[a1]) # dist3 = cuv.magnitude(bg.virtual_atoms(partner1)[a1]-bg.virtual_atoms(partner2)[a2]) # dist4 = cuv.magnitude(bg.virtual_atoms(partner1)[a2]-bg.virtual_atoms(partner2)[a1]) # assert dist < dist2, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist2) # assert dist < dist3, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist3) # assert dist < dist4, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist4) return dist
[docs]@profile def add_virtual_residues(bg, element): ''' Create all of the virtual residues and the associated bases and inverses for the given stem. .. note:: This is a low-level function used if only the virtual residues of a single stems should be added. To add the virtual residues for all stems, use `cg.add_all_virtual_residues` :param bg: The CoarseGrainRNA bulge graph containing the stem :param element: The name of the stem to be included ''' if element[0] == "s": return _add_stem_virtual_residues(bg, element) else: return _add_loop_virtual_residues(bg, element)
def _add_loop_virtual_residues(cg, element): if not cg.chains: log.info( "No virtual residues added for %s, because no pdb chain present", element) return for i, resid in enumerate(cg.define_residue_num_iterator(element, seq_ids=True)): try: global_coords = cg.chains[resid.chain][resid.resid]["C1'"].coord except KeyError: log.warning("Added virtual residue position for residue %s will be " "inaccurate, because no C1' is present. Atoms are %s", resid, list(cg.chains[resid.chain][resid.resid].child_dict.keys())) p=np.zeros(3) i=0 for atom in cg.chains[resid.chain][resid.resid]: p+=atom.coord i+=1 global_coords = p/i origin, basis = element_coord_system(cg, element) element_coords = ftuv.change_basis( global_coords - origin, basis, ftuv.standard_basis) cg.vposs[element][i] = element_coords def _add_stem_virtual_residues(bg, stem): stem_vec = bg.coords.get_direction(stem) twist_vec = bg.get_twists(stem)[0] if stem in bg.bases and np.allclose(stem_vec, bg.bases[stem][0]) and np.allclose(twist_vec, bg.bases[stem][1]): stem_inv = bg.stem_invs[stem] else: stem_basis = cuv.create_orthonormal_basis(stem_vec, twist_vec) stem_inv = nl.inv(stem_basis.transpose()) bg.bases[stem] = stem_basis bg.stem_invs[stem] = stem_inv for i in range(bg.stem_length(stem)): vpos = virtual_res_3d_pos(bg, stem, i, stem_inv=stem_inv) vbasis = virtual_res_basis(bg, stem, i, vec=vpos[1]) vinv = nl.inv(vbasis.transpose()) bg.vposs[stem][i] = vpos[0] bg.vvecs[stem][i] = vpos[1] bg.v3dposs[stem][i] = vpos bg.vbases[stem][i] = vbasis bg.vinvs[stem][i] = vinv
[docs]def stem_vres_reference_atoms(bg, s, i): ''' Calculate the position of each atom in the reference of the stem and virtual residue. :param bg: The BulgeGraph :param s: The stem identifier :param i: The i'th base-pair in the stem :return (origin, basis, [dict(atoms), dict(atoms)]) The origin of the coordinate system (vpos) The basis of the virtual residue Two dictionaries containing the positions of each atom in the coordinate system of the virtual residue ''' coords = [dict(), dict()] (vpos, vvec, vvec_l, vvec_r) = virtual_res_3d_pos(bg, s, i) #vec1 = cuv.normalize(bg.coords[s][1] - bg.coords[s][0]) #vec2 = cuv.normalize(vvec) stem_direction = bg.coords[s][1] - bg.coords[s][0] twist = vvec basis = cuv.create_orthonormal_basis(stem_direction, twist) residue_ids = bg.get_resseqs(s, seq_ids=True) for strand in [0, 1]: if strand == 0: res_id = residue_ids[0][i] else: res_id = residue_ids[1][-(1 + i)] for atom in ftup.all_rna_atoms: res = bg.chains[res_id.chain][res_id.resid] try: c = res[atom].coord except KeyError: continue else: new_c = cuv.change_basis(c - vpos, basis, cuv.standard_basis) log.debug("Atom %s has coords %s", atom, new_c) coords[strand][atom] = new_c return (vpos, basis, coords)
[docs]def bounding_boxes(bg, s, i): ''' Return the bounding boxes of the two nucleotides at the i'th position on the stem. :param bg: A BulgeGraph where bg.chains is not None :param s: The stem identifier :param i: The i'th base-pair in the stem :return: (origin, basis, [(c1, c2), (c1, c2)]) The bases and the corners defining the bounding box of the two nucleotides ''' (vpos, basis, atoms) = stem_vres_reference_atoms(bg, s, i) corners = [] for k in range(2): min_c = [10000., 10000., 10000.] max_c = [-10000., -10000., -10000.] for atom in atoms[k].values(): for j in range(3): min_c[j] = min(min_c[j], atom[j]) max_c[j] = max(max_c[j], atom[j]) n = min_c x = max_c corners += [(n, x)] return (vpos, basis, corners)
[docs]def virtual_residue_atoms(bg, s, i, strand=0): ''' Return the atoms for the virtual residue. :param bg: The BulgeGraph :param s: The stem :param i: The virtual residue number :param strand: The strand for which to get the virtual atoms ''' ''' if vpos == None or vvec == None: (vpos, vvec, vvec_l, vvec_r) = virtual_res_3d_pos(bg, s, i) if basis == None: basis = virtual_res_basis(bg, s, i, vvec).transpose() ''' if s[0] != "s": raise ValueError( "Expected stem (not single-stranded RNA element), got {}".format(s)) glob_pos = (bg.defines[s][0] + i, bg.defines[s][3] - i) glob_pos = glob_pos[strand] return bg.virtual_atoms(glob_pos)
[docs]def calc_R(xc, yc, p): """ calculate the distance of each 2D points from the center (xc, yc) """ return np.sqrt((p[:, 0] - xc) ** 2 + (p[:, 1] - yc) ** 2)
[docs]def f_2(c, p): """ calculate the algebraic distance between the data points and the mean circle centered at c=(xc, yc) """ Ri = calc_R(*c, p=p) return Ri - Ri.mean()
[docs]def circle_fit(p): x = p[:, 0] y = p[:, 1] x_m = np.mean(x) y_m = np.mean(y) u = x - x_m v = y - y_m # linear system defining the center (uc, vc) in reduced coordinates: # Suu * uc + Suv * vc = (Suuu + Suvv)/2 # Suv * uc + Svv * vc = (Suuv + Svvv)/2 Suv = sum(u * v) Suu = sum(u ** 2) Svv = sum(v ** 2) Suuv = sum(u ** 2 * v) Suvv = sum(u * v ** 2) Suuu = sum(u ** 3) Svvv = sum(v ** 3) # Solving the linear system A = np.array([[Suu, Suv], [Suv, Svv]]) B = np.array([Suuu + Suvv, Svvv + Suuv]) / 2.0 uc, vc = nl.solve(A, B) xc_1 = x_m + uc yc_1 = y_m + vc return (xc_1, yc_1) ''' Ri_1 = sqrt((x-xc_1)**2 + (y-yc_1)**2) R_1 = mean(Ri_1) residu_1 = sum((Ri_1-R_1)**2) return (xc_1, yc_1, R_1) '''
[docs]def circle_error(c, p): errors = f_2(c, p) return sum([e ** 2 for e in errors])
[docs]def f_3(vec, points, est): """ calculate the optimal circle for the points (p) projected onto the plane orthogonal to v """ basis = cuv.create_orthonormal_basis(vec) new_points = cuv.change_basis(points.T, basis, cuv.standard_basis).T p = new_points[:, 1:] #center_2, ier=so.leastsq(f_2, center_estimate,args=p) center_2 = circle_fit(p) return f_2(center_2, p)
[docs]def fit_circle(mids, points, start_pos, end_pos): ''' Calculate the projection of points on the plane normal to vec and fit a circle to them. ''' with warnings.catch_warnings(): warnings.simplefilter("ignore") v1, ier = so.leastsq(f_3, mids[1] - mids[0], args=(points, mids[0][1:])) basis1 = cuv.create_orthonormal_basis(v1) points1 = cuv.change_basis(points.T, basis1, cuv.standard_basis).T start_pos1 = cuv.change_basis(start_pos, basis1, cuv.standard_basis) end_pos1 = cuv.change_basis(end_pos, basis1, cuv.standard_basis) center_5 = circle_fit(points1[:, 1:]) mids_stem_basis = [[start_pos1[0], center_5[0], center_5[1]], [end_pos1[0], center_5[0], center_5[1]]] mids_standard_basis = cuv.change_basis(np.array(mids_stem_basis).T, cuv.standard_basis, basis1).T ''' # works! mids_stem_basis = [[nmids[0][0], center_4[0], center_4[1]], [nmids[1][0], center_4[0], center_4[1]]] mids_standard_basis = cuv.change_basis(np.array(mids_stem_basis).T, cuv.standard_basis, basis).T ''' return mids_standard_basis
[docs]def extract_define_residues(define, chain): '''Extract the residues in the define and return them as a new chain.''' c = bpdb.Chain.Chain(' ') ranges = zip(*[iter(define)] * 2) for r in ranges: for x in range(r[0], r[1] + 1): c.add(chain[x]) return c
[docs]def add_stem_information_from_pdb_chains(cg): ''' Get the 3D information of the stems. Output the mid points of the helices as well as the 'twist' vectors which describe the projection of the (ca - mids) vectors onto the plane perpendicular to the axis of the helix. Add all of this information to the BulgeGraph data structure. :param bg: The BulgeGraph. :param chain: The Bio.PDB chain representation of the 3D structure. ''' new_chains = {} for name, chain in cg.chains.items(): new_chains[name] = ftup.rename_rosetta_atoms(chain) for d in cg.defines.keys(): if d[0] == 's': coords, twists = stem_from_chains(cg, new_chains, d) cg.coords[d] = coords stem_dir = cg.coords[d][1] - cg.coords[d][0] cg.twists[d] = twists assert abs(np.dot(stem_dir, twists[0])) < 10**-10 assert abs(np.dot(stem_dir, twists[1])) < 10**-10
#cg.sampled[d] = [cg.name] + cg.defines[d]
[docs]def get_incomplete_elements(cg): """ Get an estimated list of cg-elements which have missing residues in the PDB. One of many problems with PDB data are residues, for which no coordinates could be determined experimentally. This function gives an estimated list of cg-elements, which are affected by missing residues. """ incomplete = set() for elem in cg.defines: for r in cg.define_range_iterator(elem, adjacent=elem[0] != "s"): if cg.seq[r[0]:r[1]] != cg.seq.with_missing[r[0]:r[1]]: incomplete.add(elem) return incomplete
[docs]def add_loop_information_from_pdb_chains(bg): seq_ids = True #log.info("add_loop_information_from_pdb_chains called") for d in it.chain(bg.hloop_iterator(), bg.floop_iterator(), bg.tloop_iterator()): if d not in bg.defines: assert False edges = list(bg.edges[d]) if len(edges) == 0: # Odd case where there are no stems in the structure # We should find the furthest distance from the first # nucleotide log.info( "add_loop_information_from_pdb_chain: {} has no neighbor".format(d)) chain_ids = set(x.chain for y in bg.get_resseqs(d) for x in y) assert len(chain_ids) == 1 c, = chain_ids chain = bg.chains[c] first_res = None for res in chain.get_residues(): if REFERENCE_CATOM in res: first_res = res break try: start_point = first_res[REFERENCE_CATOM].get_vector().get_array() except TypeError: if first_res is not None: raise else: e = CgConstructionError("The PDB chain does not contain any {} atom (despite containing {} residues).".format( REFERENCE_CATOM, len(list(chain.get_residues())))) with log_to_exception(log, e): log.error( "The chain's last residue only has the following atoms: %s", res.child_list) raise e centroid = get_furthest_c_alpha(bg, chain, first_res[REFERENCE_CATOM].get_vector( ).get_array(), d) else: chain_ids = set(x.chain for y in bg.get_resseqs(d) for x in y) assert len(chain_ids) == 1 c, = chain_ids chain = bg.chains[c] s1 = edges[0] s1d = bg.defines[s1] bd = bg.defines[d] (s1b, s2b) = bg.get_sides(s1, d) mids = bg.coords[s1] start_point = mids[s1b] #centroid = get_bulge_centroid(chain, bd) centroid = get_furthest_c_alpha(bg, chain, mids[s1b], d) if centroid is None: print("No end found for loop %s... using the end of stem %s" % (d, s1), file=sys.stderr) centroid = mids[s1b] assert start_point is not None assert centroid is not None bg.coords[d] = (start_point, centroid)
def _add_loop_vres(cg): if len(cg.defines) < 2: return # fifeprime only-cgs have no twists log.debug("Adding virtual residues") for elem in cg.defines: if elem[0] != "s": try: add_virtual_residues(cg, elem) except: log.warning("Could not add virtual residues from PDB for %s, elem %s", cg.name, elem)
[docs]def cylinder_works(cg, cylinders_to_stems, tv, c, r=4.): ''' Check if all of these points are inside the cylinder. ''' points = [cg.coords[tv][0], cg.coords[tv][1]] for s in cylinders_to_stems[c]: points += [cg.coords[s][0], cg.coords[s][1]] data = np.array(points) datamean = data.mean(axis=0) uu, dd, vv = np.linalg.svd(data - datamean) n = vv[0] p = data a = datamean dist_vec = (a - p) - (np.dot((a - p), n)[:, np.newaxis]) * n # pylint: disable=invalid-sequence-index mags = [ftuv.magnitude(c) for c in dist_vec] ''' linepts = vv[0] * np.mgrid[-7:7:2j][:, np.newaxis] linepts += datamean import matplotlib.pyplot as plt import mpl_toolkits.mplot3d as m3d ax = m3d.Axes3D(plt.figure()) ax.scatter3D(*data.T) ax.plot3D(*linepts.T) ''' if max(mags) > r: return False return True
[docs]def get_encompassing_cylinders(cg, radius=6.): visited = set() # the stems_in_cylinders dictionary will be indexed by stem name and contain # the number of the cylinder it contains #stems_to_cylinders = {'s0': 0} stems_to_cylinders = dict() cylinders_to_stems = col.defaultdict(list) #cylinders_to_stems = {0: ['s0']} # the first cylinder is equal to the first stem #cylinders = {0: cg.coords['s0']} to_visit = [random.choice(list(cg.defines.keys()))] cylinder_counter = 0 while to_visit: tv = to_visit.pop(0) if tv in visited: continue visited.add(tv) for e in cg.edges[tv]: to_visit.append(e) # not interested in non- stem, multiloop or interior loop elements if tv[0] != 's' and tv[0] != 'm' and tv[0] != 'i': continue #cylinders_to_check = set(cylinders_to_stems.keys()) cylinders_to_check = set() # find which cylinders we need to check for e in cg.edges[tv]: if e in stems_to_cylinders: cylinders_to_check.add(stems_to_cylinders[e]) found = False for c in sorted(cylinders_to_check, key=lambda x: -sum([cg.stem_length(k) for k in cylinders_to_stems[x]])): # the new node will definitely be at the end of the cylinder # print "checking...:", c, tv if cylinder_works(cg, cylinders_to_stems, tv, c, radius): cylinders_to_stems[c] += [tv] stems_to_cylinders[tv] = c found = True break if not found: # no appropriately sized cylinder has been found so we # just create new one containing just this stem cylinder_counter += 1 cylinders_to_stems[cylinder_counter] += [tv] stems_to_cylinders[tv] = cylinder_counter return cylinders_to_stems
[docs]def element_coord_system(cg, d): ''' Get a coordinate system for a particular coarse grain element. If an element has an axis vector, a, twist vectors t1 and t2, then the coordinate system will be a normalized version of the axis a, the second, v2, will be equal to norm((t1 + t2) / 2.) And the third will be equal to a x v2. ''' vec_axis = ftuv.normalize(cg.coords[d][1] - cg.coords[d][0]) twists = cg.get_twists(d) mid_twist = ftuv.normalize(twists[0] + twists[1]) assert abs(np.dot(vec_axis, twists[0])) < 10**- \ 10, "{}: {}".format(d, abs(np.dot(vec_axis, twists[0]))) assert abs(np.dot(vec_axis, twists[1])) < 10**-10 return (((cg.coords[d][0] + cg.coords[d][1]) / 2.), ftuv.create_orthonormal_basis(vec_axis, mid_twist))
[docs]def virtual_atoms(cg, given_atom_names=None, sidechain=True): ''' Get a list of virtual atoms for this structure. :param cg: The coarse grain structure. ''' return VirtualAtomsLookup(cg, given_atom_names, sidechain)
# Module-level var used for caching. _average_atom_positions = None
[docs]class VirtualAtomsLookup(object): """ An object with a dict-like interface that calculates the virtual atom positions on demand. """ def __init__(self, cg, given_atom_names=None, sidechain=True): """ :param cg: The coarse grain structure, for which the virtual atoms are generated. ..note :: If cg is modified, new virtual atom positions are calculated. """ self.cg = cg self.given_atom_names = given_atom_names self.sidechain = sidechain #@profile def __getitem__(self, position): """ :returns: A dictionary containing all atoms (as keys) and their positions (as values) for the given residue. :param position: The position of the residue in the RNA (starting with 1) """ # Find out the stem for which we have to calculate virtual atom positions for key, value in self.cg.defines.items(): if len(value) < 2: continue # For multiloops of length 0, value is [] elif position >= value[0] and position <= value[1]: return self._getitem_for_element(key, position) elif len(value) == 4 and position >= value[2] and position <= value[3]: return self._getitem_for_element(key, position) assert False, "No return for pos {}".format(position) #@profile
[docs] def keys(self): k = set() for value in self.cg.defines.values(): if len(value) > 1: for i in range(value[0], value[1] + 1): k.add(i) if len(value) > 3: for i in range(value[2], value[3] + 1): k.add(i) return k
def _getitem_for_element(self, d, pos): """ :returns: A dictionary containing all atoms (as keys) and their positions (as values) for the given residue. :param d: The coarse grained element (e.g. "s1") :param pos: The position of the residue. It has to be in the element d! """ global _average_atom_positions if d[0] == "s": # Use virtual residues for stems. return self._getitem_for_stem(d, pos) if _average_atom_positions is None: log.info("LOADING AV_ATOM_POS") import pkgutil data = pkgutil.get_data( 'forgi', 'threedee/data/average_atom_positions.json') _average_atom_positions = json.loads(data.decode("ascii")) log.debug("Using loaded av_atom_pos") e_coords = dict() try: origin, basis = element_coord_system(self.cg, d) except ValueError as e: # 0-length hairpin. if d[0] == "h" and np.array_equal(self.cg.coords[d][0], self.cg.coords[d][1]): warnings.warn( "Returning empty set of virtual atoms for 0-length hairpin") return e_coords else: raise print(e, "for position {} in element {} with define {}".format( pos, d, self.cg.defines[d])) raise if d[0] == 'i' or d[0] == 'm': conn = self.cg.connections(d) conn_type = self.cg.connection_type(d, conn) else: conn_type = 0 for i, r in zip(it.count(), self.cg.define_residue_num_iterator(d)): if r != pos: continue if self.given_atom_names is None: if self.sidechain: # Seq is now 1-based atom_names = (ftup.nonsidechain_atoms + [ self.cg.seq[r] + "." + x for x in ftup.side_chain_atoms[self.cg.seq[r]]]) else: atom_names = ftup.nonsidechain_atoms else: atom_names = self.given_atom_names for aname in atom_names: identifier = "%s %s %d %d %s" % (d[0], " ".join( map(str, self.cg.get_node_dimensions(d))), conn_type, i, aname) if "." in aname: _, _, aname = aname.partition(".") try: e_coords[aname] = origin + ftuv.change_basis( np.array(_average_atom_positions[identifier]), ftuv.standard_basis, basis) except KeyError as ke: #warnings.warn("KeyError in virtual_atoms. No coordinates found for: {}".format(ke)) pass return e_coords def _getitem_for_stem(self, d, pos): log.debug("getitem_for_stem %s, pos %s", d, pos) pos_in_stem, side = self.cg.stem_resn_to_stem_vres_side(d, pos) assert pos >= 1 try: residue = (self.cg.seq[pos]) except IndexError as e: with log_to_exception(log, e): log.error("position {} not in sequence {}".format( pos - 1, self.cg.seq)) raise if self.given_atom_names is None: if self.sidechain: atom_names = (ftup.nonsidechain_atoms + ftup.side_chain_atoms[residue]) else: atom_names = ftup.nonsidechain_atoms else: atom_names = self.given_atom_names atom_keys = [] atom_coords = [] for aname in atom_names: if aname[-1] == "*": aname_dash = aname[:-1] + "'" else: aname_dash = aname spos = ftus.avg_stem_vres_atom_coords[side][residue][aname_dash] # TODO: Maybe we can vectorize this and calculate pos from spos for all atoms of the residue at once. atom_keys.append(aname) atom_coords.append(spos) try: # virtual_res_basis(self.cg, d, pos_in_stem) vres_basis = self.cg.vbases[d][pos_in_stem] # virtual_res_3d_pos(self.cg, d, pos_in_stem)[0] vres_pos = self.cg.vposs[d][pos_in_stem] except KeyError: self.cg.add_all_virtual_residues() # virtual_res_basis(self.cg, d, pos_in_stem) vres_basis = self.cg.vbases[d][pos_in_stem] # virtual_res_3d_pos(self.cg, d, pos_in_stem)[0] vres_pos = self.cg.vposs[d][pos_in_stem] atom_coords = ftuv.change_basis_vectorized( np.array(atom_coords), ftuv.standard_basis, vres_basis) + vres_pos return {aname: coord for aname, coord in zip(atom_keys, atom_coords)}
[docs]def vres_to_global_coordinates(vres_pos, vres_basis, positions): newpos = {} for key, v_pos in positions.items(): pos = ftuv.change_basis(v_pos, ftuv.standard_basis, vres_basis) newpos[key] = pos + vres_pos return newpos
[docs]def element_distance(cg, l1, l2): ''' Calculate the distance between the two closest points of these two elements. ''' (i1, i2) = ftuv.line_segment_distance(cg.coords[l1][0], cg.coords[l1][1], cg.coords[l2][0], cg.coords[l2][1]) return ftuv.vec_distance(i1, i2)
[docs]def get_basepair_center(cg, pos): """ The center of a basepair, as defined in doi: 10.1261/rna.305307 :param pos: The number of one of the two pairing bases """ pos2 = cg.pairing_partner(pos) seq1 = cg.seq[pos - 1] seq2 = cg.seq[pos2 - 1] atoms = {"A": ["C1'", "C8"], "G": ["C1'", "C8"], "U": ["C1'", "C6"], "C": ["C1'", "C6"]} va1 = cg.virtual_atoms(pos) va2 = cg.virtual_atoms(pos2) avpos = np.zeros(3) try: for atom in atoms[seq1]: avpos += va1[atom] for atom in atoms[seq2]: avpos += va2[atom] except KeyError: log.error("%s\n%s", va1.keys(), va2.keys()) raise avpos /= (len(atoms[seq1]) + len(atoms[seq2])) return avpos
[docs]def get_basepair_plane(cg, pos): """ The plane of the basepair, as defined in figure 13 of doi: 10.1261/rna.305307 :param pos: The number of one of the two pairing bases """ pos2 = cg.pairing_partner(pos) seq1 = cg.seq[pos - 1] seq2 = cg.seq[pos2 - 1] va1 = cg.virtual_atoms(pos) va2 = cg.virtual_atoms(pos2) h_bonds = {"U": {"A": [("O4", "N6"), ("N3", "N1")], "G": [("N3", "O6"), ("O2", "N1")]}, "A": {"U": [("N6", "O4"), ("N1", "N3")]}, "G": {"U": [("O6", "N3"), ("N1", "O2")], "C": [("O6", "N4"), ("N1", "N3"), ("N2", "O2")]}, "C": {"G": [("N4", "O6"), ("N3", "N1"), ("O2", "N2")]} } #print( seq1, seq2 ) try: hb = h_bonds[seq1][seq2] except KeyError: # Non-canonical basepair warnings.warn("Estimating plane from stem vector for " " non-canonical basepair {}-{} at positions" " {},{}".format(seq1, seq2, pos, pos2)) # ValueError, if cg.pairing_partner is buggy stem, = cg.nucleotides_to_elements([pos, pos2]) return cg.coords[stem][0] - cg.coords[stem][1] else: plane = np.zeros(3) contribs = 0 for l1, l2 in it.combinations(hb, 2): left_1 = va1[l1[0]] left_2 = va1[l2[0]] right_1 = va2[l1[1]] right_2 = va2[l2[1]] add = np.cross(right_1 - left_1, right_2 - left_1) if np.any(plane != np.zeros(3)): assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}" " degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane)))) plane += add add = np.cross(right_1 - left_1, left_2 - right_1) assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}" " degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane)))) plane += add add = np.cross(right_2 - left_2, right_2 - left_1) assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}" " degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane)))) plane += add add = np.cross(right_2 - left_2, left_2 - right_1) assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}" " degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane)))) plane += add return ftuv.normalize(plane)

Contents