#!/usr/bin/python
from __future__ import print_function
from __future__ import division
from builtins import map
from builtins import range
from builtins import object
from builtins import zip, str
import itertools as it
import collections as col
import os.path as op
import warnings
import random
import sys
import math
import json
import operator
from pprint import pprint
import logging
import uuid
import numpy as np
import numpy.linalg as nl
import numpy.testing as nptest
import scipy.optimize as so
import Bio.PDB as bp
import Bio.PDB as bpdb
import Bio.PDB.PDBExceptions
from logging_exceptions import log_to_exception
import forgi.threedee.utilities.average_stem_vres_atom_positions as ftus
import forgi.utilities.debug as fud
import forgi.threedee.utilities.my_math as ftum
import forgi.threedee.utilities.pdb as ftup
import forgi.threedee.utilities.vector as cuv
import forgi.threedee.utilities.vector as ftuv
import forgi
from forgi.threedee.utilities.modified_res import change_residue_id
from forgi.utilities.exceptions import CgConstructionError
from forgi.threedee.utilities.pdb import AtomName
log = logging.getLogger(__name__)
REFERENCE_CATOM = AtomName("C1'")
try:
profile # The @profile decorator from line_profiler (kernprof)
except:
[docs] def profile(x):
return x
[docs]def stem_stem_orientation(cg, s1, s2):
'''
Calculate the orientation of stem s2 in relation to stem s1
as described by 3 parameters:
1. The distance between the closest points of the two stems.
2. The angle between s1 and s2 in the plane formed by the axis of
the first stem and the vector between the two points closest
to each on both stems.
3. The angle of s2 out of the plane formed by their axes.
:param bg: The BulgeGraph containing the stems.
:param s1: The name of the first stem
:param s2: The name of the second stem
:return: (x,y,z) where x,y and z are the parameters described in
the description above.
'''
# shorten the names a little bit
s1_p0 = cg.coords[s1][0]
s1_p1 = cg.coords[s1][1]
s2_p0 = cg.coords[s2][0]
s2_p1 = cg.coords[s2][1]
# The vectors of the axes of the cylinders
s1_vec = cg.coords[s1][1] - cg.coords[s1][0]
s2_vec = cg.coords[s2][1] - cg.coords[s2][0]
# the minimum distance between the two stems, which are represented
# as line segments
(i1, i2) = cuv.line_segment_distance(s1_p0, s1_p1, s2_p0, s2_p1)
i_vec = i2 - i1
i_rej = cuv.vector_rejection(i_vec, s1_vec)
plane_vec = np.cross(i_rej, s1_vec)
# s2_proj is in the intersection plane
s2_proj_in = cuv.vector_rejection(s2_vec, plane_vec)
# s2 proj_out is out of the intersection plane
s2_proj_out = cuv.vector_rejection(s2_vec, i_rej)
# the normal of the plane defined by the two stem vectors
#ang1 = cuv.vec_angle(s1_vec, s2_proj_out)
#ang2 = cuv.vec_angle(s1_vec, s2_proj_in)
ang1 = cuv.vec_angle(s2_proj_in, s1_vec)
ang2 = cuv.vec_angle(s2_proj_out, s1_vec)
#ang3 = cuv.vec_angle(s1_vec, s2_proj_in)
#ang4 = cuv.vec_angle(s1_vec, s2_proj_out)
# ever so slightly increased to prevent domain errors
# in the lateral_offset calculation below
dist = cuv.magnitude(i_vec) + 0.0001
ortho_offset = cuv.magnitude(i_rej)
lateral_offset = math.sqrt(dist * dist - ortho_offset * ortho_offset)
return (cuv.magnitude(i_vec), ang1,
ang2, cuv.vec_angle(s1_vec, s2_vec), lateral_offset, ortho_offset)
[docs]def base_normals(pdb_filename):
'''
Return a list of the normals for each base in the structure.
As defined by the average of the cross products between the C2-C5
and C2-C6 vectors and the N3-C6 and N3-C5 vectors. The origin of
the vector will be the centroid of these four atoms.
:param pdb_filename: The name of the pdb file containing the structure
:return: A list of pairs containing the origin the normal as well as the
normal itself.
'''
with warnings.catch_warnings():
warnings.simplefilter("ignore")
struct = bp.PDBParser().get_structure('t', pdb_filename)
chain = list(struct.get_chains())[0]
origin_norms = []
for res in chain:
c2 = res['C2'].get_vector().get_array()
c5 = res['C5'].get_vector().get_array()
c6 = res['C6'].get_vector().get_array()
n3 = res['N3'].get_vector().get_array()
v1 = cuv.normalize(np.cross(c6 - c2, c5 - c2))
v2 = cuv.normalize(np.cross(c6 - n3, c5 - n3))
# take the average of the two, for accuracy or something
v_norm = (v1 + v2) / 2.
origin = (c2 + c5 + c6 + n3) / 4.
origin_norms += [(origin, v_norm)]
return origin_norms
[docs]def get_twist_angle(coords, twists):
'''
Get the angle of the twists with respect to each other.
:param coords: The coordinates of the ends of the stem.
:param twists: The two twist vectors.
:return angle: The angle between the two twist vectors.
'''
stem_vec = coords[1] - coords[0]
basis = cuv.create_orthonormal_basis(stem_vec, twists[0])
twist2 = cuv.change_basis(twists[1], basis, cuv.standard_basis)
#assert_allclose(twist2[0], 0., rtol=1e-7, atol=1e-7)
angle = math.atan2(twist2[2], twist2[1])
return angle
[docs]def twist2_from_twist1(stem_vec, twist1, angle):
'''
Get an orientation for the second twist which will place it an
angle of angle from the first twist.
:param stem_vec: The vector of the stem.
:param twist1: The vector of the first twist.
:param angle: The angular difference between the two twists.
'''
basis = cuv.create_orthonormal_basis(stem_vec, twist1)
twist2_new = np.array([0., math.cos(angle), math.sin(angle)])
twist2 = np.dot(basis.transpose(), twist2_new)
#twist2 = cuv.change_basis(twist2_new, cuv.standard_basis, basis)
return twist2
[docs]def get_twist_parameter(twist1, twist2, u_v):
'''
Calculate how much stem1 must be twisted for its twist vector
to coincide with that of stem2.
:param twist1: The twist notator of stem1
:param twist2: The twist notator of stem2
:param u_v: The parameters u and v for rotating stem2 onto stem1
'''
u, v = u_v
rot_mat1 = cuv.rotation_matrix("z", v)
rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.)
twist2_new = np.dot(rot_mat1, twist2)
twist2_new = np.dot(rot_mat2, twist2_new)
# print "get_twist_parameter twist2:", twist2_new
return math.atan2(twist2_new[2], twist2_new[1])
[docs]def get_stem_orientation_parameters(stem1_vec, twist1, stem2_vec, twist2):
'''
Return a parameterization of the orientation of stem2 with respect to
stem1.
stem1 -> bulge -> stem2
:param stem1_vec: The vector representing the axis of stem1
:param twist1: The twist of stem1 closest to the bulge
:param stem2_vec: The vector representing the axis of stem2
:returns: (r,u,v,t) where r,u,v = the stem orientation in polar coordinates
and t is the twist parameter.
'''
# Since we will denote the orientation of stem2 with respect to stem1
# We first need to define a new coordinate system based on stem1
stem1_basis = cuv.create_orthonormal_basis(stem1_vec, twist1)
log.debug("Stem1 basis \n%s", stem1_basis)
# Transform the vector of stem2 to the new coordinate system
stem2_new_basis = cuv.change_basis(stem2_vec, stem1_basis,
cuv.standard_basis)
log.debug("Stem2 in basis of stem 1 %s", stem2_new_basis)
twist2_new_basis = cuv.change_basis(twist2, stem1_basis,
cuv.standard_basis)
# Convert the cartesian coordinates to polar coordinates
(r, u, v) = cuv.spherical_cartesian_to_polar(stem2_new_basis)
t = get_twist_parameter(twist1, twist2_new_basis, (u, v))
log.debug("r %s, u %s, v %s, t %s", r, u, v, t)
return (r, u, v, t)
[docs]def get_stem_separation_parameters(stem, twist, bulge):
'''
Parameterize the location of the bulge with respect to the stem.
:param stem: The stem vector.
:param bulge: the bulge vector.
'''
stem_basis = cuv.create_orthonormal_basis(stem, twist)
bulge_new_basis = cuv.change_basis(bulge, stem_basis, cuv.standard_basis)
return cuv.spherical_cartesian_to_polar(bulge_new_basis)
[docs]def get_stem_twist_and_bulge_vecs(bg, bulge, connections):
'''
Return the vectors of the stems and of the twists between which
we want to calculate angles.
The two vectors will be defined as follows:
s1e -> s1b -> b -> s2b -> s2e
The twists will be the two closest to the bulge.
:param bulge: The name of the bulge separating the two helices.
:param connections: The two stems that are connected to this bulge.
:return: (stem1, twist1, stem2, twist2, bulge)
'''
s1 = connections[0]
s2 = connections[1]
#s1d = bg.defines[s1]
#s2d = bg.defines[s2]
mids1 = bg.coords[s1]
twists1 = bg.twists[s1]
mids2 = bg.coords[s2]
twists2 = bg.twists[s2]
# find out which sides of the stems are closest to the bulge
# the results will be indexes into the mids array
(s1b, s1e) = bg.get_sides(s1, bulge)
log.debug("Side of 1st stem %s attached to %s is %s ", s1, bulge, s1b)
(s2b, s2e) = bg.get_sides(s2, bulge)
log.debug("Side of 2nd stem %s attached to %s is %s ", s2, bulge, s2b)
# Create directional vectors for the stems
# For ML: For IL: -> -> ->
# | A
# V |
# * -> *
stem1_vec = mids1[s1b] - mids1[s1e]
bulge_vec = mids2[s2b] - mids1[s1b]
stem2_vec = mids2[s2e] - mids2[s2b]
#twists1_vec = [twists1[s1b], twists1[s1e]]
#twists2_vec = [twists2[s2e], twists2[s2b]]
return (stem1_vec, twists1[s1b], stem2_vec, twists2[s2b], bulge_vec)
[docs]def stem2_pos_from_stem1(stem1, twist1, params):
'''
Get the starting point of a second stem, given the parameters
about where it's located with respect to stem1
:param stem1: The vector representing the axis of stem1's cylinder
:param twist1: The twist parameter of stem1
:param params: The parameters describing the position of stem2 wrt stem1
'''
(r, u, v) = params
stem2 = cuv.spherical_polar_to_cartesian((r, u, v))
stem1_basis = cuv.create_orthonormal_basis(stem1, twist1)
stem2_start = np.dot(stem1_basis.transpose(), stem2)
return stem2_start
[docs]def stem2_pos_from_stem1_1(transposed_stem1_basis, params):
'''
Get the starting point of a second stem, given the parameters
about where it's located with respect to stem1
The params of the stat describe the change in the coordinate system of stem1.
This function converts that to a carthesian vector the standard coordinate system
:param transposed_stem1_basis: The vtransposed basis of the first stem.
:param params: The parameters describing the position of stem2 wrt stem1
(i.e. the carthesian vector in standard coordinates pointing from stem1 to stem2)
'''
(r, u, v) = params
stem2 = cuv.spherical_polar_to_cartesian((r, u, v))
stem2_start = np.dot(transposed_stem1_basis, stem2)
return stem2_start
# Seems to be unused!
[docs]def twist2_orient_from_stem1(stem1, twist1, u_v_t):
'''
Calculate the position of the twist factor of the 2nd stem from its
parameters and the first stem.
:param stem1: The vector representing the axis of stem1's cylinder
:param twist1: The twist factor of stem1.
:param u_v_t: The parameters describing how the twist of stem2 is
oriented with respect to stem1. A triple `(u, v, t)`
'''
u, v, t = u_v_t
twist2_new = np.array([0., math.cos(t), math.sin(t)])
rot_mat1 = cuv.rotation_matrix("z", v)
rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.)
rot_mat = np.dot(rot_mat2, rot_mat1)
twist2_new = np.dot(nl.inv(rot_mat), twist2_new)
'''
twist2_new = dot(inv(rot_mat2), twist2_new)
twist2_new = dot(inv(rot_mat1), twist2_new)
'''
stem1_basis = cuv.create_orthonormal_basis(stem1, twist1)
twist2_new_basis = cuv.change_basis(twist2_new, cuv.standard_basis,
stem1_basis)
return twist2_new_basis
[docs]def twist2_orient_from_stem1_1(stem1_basis, u_v_t):
'''
Calculate the position of the twist factor of the 2nd stem from its
parameters and the first stem.
:param stem1: The vector representing the axis of stem1's cylinder
:param twist1: The twist factor of stem1.
:param u_v_t: The parameters describing how the twist of stem2 is
oriented with respect to stem1. A triple `(u, v, t)`
'''
u, v, t = u_v_t
twist2_new = np.array([0., math.cos(t), math.sin(t)])
rot_mat1 = cuv.rotation_matrix("z", v)
rot_mat2 = cuv.rotation_matrix("y", u - math.pi / 2.)
rot_mat = np.dot(rot_mat2, rot_mat1)
#assert np.allclose(nl.inv(rot_mat), rot_mat.T)
twist2_new = np.dot(rot_mat.T, twist2_new)
twist2_new_basis = np.dot(stem1_basis, twist2_new)
return twist2_new_basis
[docs]def stem2_orient_from_stem1(stem1, twist1, r_u_v):
'''
Calculate the orientation of the second stem, given its parameterization
and the parameterization of stem1
:param stem1: The vector representing the axis of stem1's cylinder
:param twist1: The twist factor of stem1.
:param r_u_v: The orientation of stem2 wrt stem1, a triple `(r, u, v)`
'''
stem1_basis = cuv.create_orthonormal_basis(stem1, twist1)
return stem2_orient_from_stem1_1(stem1_basis.transpose(), r_u_v)
[docs]def stem2_orient_from_stem1_1(stem1_basis, r_u_v):
'''
Calculate the orientation of the second stem, given its parameterization
and the parameterization of stem1
:param stem1: The vector representing the axis of stem1's cylinder
:param twist1: The twist factor of stem1.
:param r_u_v: The orientation of stem2 wrt stem1, a triple `(r, u, v)`
'''
r, u, v = r_u_v
stem2_in_basis1 = cuv.spherical_polar_to_cartesian((r, u, v))
stem2 = np.dot(stem1_basis, stem2_in_basis1)
return stem2
[docs]def get_angle_stat_geometry(stem1_vec, twist1, stem2_vec, twist2, bulge_vec):
"""
:param stem1_vec: The vector of the first stem, pointing TOWARDS the bulge
:param twist1: The twist vector at the side of stem1 closest to the bulge
:param stem2_vec: The vector of the second stem, pointing AWAY FROM the bulge
:param twist2: The twist vector at the side of stem2 closest to the bulge
:param bulge_vec: The vector from stem1 to stem2
:returns: T 6-tuple: u,v (the orientation parameters),
t (twist parameter) and
r1, u1, v1 (the seperation parameters)
\ A
\ stem1 /
\ / stem2
V /
--------->
bulge
"""
try:
# Get the orientations for orienting these two stems
(r, u, v, t) = get_stem_orientation_parameters(stem1_vec, twist1,
stem2_vec, twist2)
(r1, u1, v1) = get_stem_separation_parameters(
stem1_vec, twist1, bulge_vec)
except ZeroDivisionError as e:
with log_to_exception(log, e):
log.error("Cannot get stat. The 3D coodinates are probably wrong.")
raise
return u, v, t, r1, u1, v1
[docs]@profile
def get_broken_ml_deviation(cg, broken_ml_name, fixed_stem_name, virtual_stat):
"""
If we assgin a stat to a broken ml-segment, how much would the attached
stem deviate from its true location.
Calculates the position of a '"virtual stem", which would be
placed after the broken ml-segment, if it was a true ml-segment
with the virtual stat assigned.
Then calculates the deviation between this virtual stem and the actual stem.
:param cg: The CoarseGrainRNA
:param broken_ml_name: The name of the ml-segment of interest.
It should not be part of cg.mst.
:param fixed_stem_name: The name of a stem (e.g. "s0") attached to the
broken ml segment. This stem will be used as reference,
For the other stem attached to the broken ml-segment (original_stem),
the virtual stem position will be calculated.
:param virtual_stat: The stat assigned to the broken ml segment, in the direction
from fixed_stem_name to the other stem.
:returns: A triple: positional_deviation, angular_deviation and twist_deviation.
positional_deviation measures how far the start of the virtual stem
is from the start of the true ("original") stem.
Angular deviation measures (in radians) the differece in the stem's orientation.
twist_deviation measures the angle (in radians) between the two stem's twist vectors.
"""
log.debug("Getting broken ML deviation for %s attached to %s "
"using stat %s", broken_ml_name, fixed_stem_name,
virtual_stat.pdb_name)
s1, s2 = cg.edges[broken_ml_name]
if s1 == fixed_stem_name:
orig_stem_name = s2
elif s2 == fixed_stem_name:
orig_stem_name = s1
else:
raise ValueError("fixed stem {} is not attached to ml {} with "
"edges {}".format(fixed_stem_name, broken_ml_name, [s1, s2]))
sides = cg.get_sides(fixed_stem_name, broken_ml_name)
fixed_s_vec = cg.coords.get_direction(fixed_stem_name)
if sides[0] == 0:
fixed_s_vec = -fixed_s_vec
s_twist = cg.twists[fixed_stem_name][sides[0]]
fixed_stem_basis = ftuv.create_orthonormal_basis(fixed_s_vec, s_twist)
vbulge_vec, vstem_vec, vstem_twist = _virtual_stem_from_bulge(
fixed_stem_basis, virtual_stat)
vstem_vec *= 5
vstem_coords0 = cg.coords[fixed_stem_name][sides[0]] + vbulge_vec
vstem_coords1 = vstem_coords0 + vstem_vec
sides2 = cg.get_sides(orig_stem_name, broken_ml_name)
orig_coords0 = cg.coords[orig_stem_name][sides2[0]]
orig_coords1 = cg.coords[orig_stem_name][sides2[1]]
orig_stem_vec = orig_coords1 - orig_coords0
true_bulge_vec = orig_coords0 - cg.coords[fixed_stem_name][sides[0]]
pos_dev = (ftuv.vec_distance(orig_coords0, vstem_coords0))
ang_dev = ftuv.vec_angle(vstem_vec, orig_stem_vec)
twist_dev = ftuv.vec_angle(
cg.twists[orig_stem_name][sides2[0]], vstem_twist)
log.debug("Deviation: pos %s, orient %s, twist: %s", pos_dev,
math.degrees(ang_dev), math.degrees(twist_dev))
# For debugging
#max_diff = 6
#max_adiff = math.radians(3*max_diff)
# if pos_dev < max_diff and ang_dev<max_adiff and twist_dev<2*max_adiff:
if False: # plotting-code used for debugging
pos_adev = ftuv.vec_angle(true_bulge_vec, vbulge_vec)
log.info("Deviation: pos %s, %s orient %s, twist: %s", pos_dev, math.degrees(pos_adev),
math.degrees(ang_dev), math.degrees(twist_dev))
log.info("Length: virtual: %s original: %s", ftuv.magnitude(
vstem_vec), ftuv.magnitude(orig_stem_vec))
import matplotlib.pyplot as plt
_plot_junction_2d(cg, broken_ml_name)
plt.plot([cg.coords[fixed_stem_name][sides[0]][0], cg.coords[orig_stem_name][sides2[0]][0]],
[cg.coords[fixed_stem_name][sides[0]][1],
cg.coords[orig_stem_name][sides2[0]][1]],
".-", label="true bulge")
plt.plot([cg.coords[fixed_stem_name][sides[0]][0], vstem_coords0[0]],
[cg.coords[fixed_stem_name][sides[0]][1], vstem_coords0[1]],
".-", label="virtual bulge")
plt.plot([vstem_coords0[0], vstem_coords1[0]],
[vstem_coords0[1], vstem_coords1[1]],
"s-", label="virtual" + orig_stem_name)
plt.legend()
plt.show()
return pos_dev, ang_dev, twist_dev
def _plot_element(cg, elem, style="o-", name_suffix=""):
import matplotlib.pyplot as plt
plt.plot([cg.coords[elem][0][0], cg.coords[elem][1][0]],
[cg.coords[elem][0][1], cg.coords[elem][1][1]],
style,
label=elem + name_suffix)
def _plot_junction_2d(cg, broken_ml):
"""
TODO: Move this to a proper location
"""
import matplotlib.pyplot as plt
plotted = set()
_plot_element(cg, broken_ml, name_suffix=" broken")
elem = cg.get_next_ml_segment(broken_ml)
while elem != broken_ml:
_plot_element(cg, elem)
for s in cg.edges[elem]:
if s not in plotted:
_plot_element(cg, s)
plotted.add(s)
elem = cg.get_next_ml_segment(elem)
def _virtual_stem_from_bulge(prev_stem_basis, stat):
"""
Return a virtual stem with length 1 that would be placed
by stat and prev_stem
:param prev_stem_basis: The basis of the previous stem.
:param stat: The angle stat that describes the orientation of the
virtual stem from the previous stem.
"""
transposed_stem1_basis = prev_stem_basis.transpose()
start_location = stem2_pos_from_stem1_1(
transposed_stem1_basis, stat.position_params())
stem_orientation = stem2_orient_from_stem1_1(transposed_stem1_basis,
[1] + list(stat.orientation_params()))
twist1 = twist2_orient_from_stem1_1(
transposed_stem1_basis, stat.twist_params())
return start_location, stem_orientation, twist1
[docs]def get_centroid(chain, residue_num):
"""
:param residue_num: A list of integers
"""
residue_num = [int(i) for i in residue_num]
#print >>sys.stderr, "residue_num:", residue_num
atoms = []
for i in residue_num:
try:
atoms += [chain[i][REFERENCE_CATOM]]
except KeyError:
# the C1* atom probably doesn't exist
continue
vectors = [atom.get_vector().get_array() for atom in atoms]
return cuv.get_vector_centroid(vectors)
# Seems to be unused!
[docs]def get_bulge_centroid(chain, define):
i = 0
res_nums = []
while i < len(define):
res_nums += range(int(define[i]), int(define[i + 1]) + 1)
i += 2
#print >>sys.stderr, "res_nums:", res_nums
return get_centroid(chain, res_nums)
[docs]def get_furthest_c_alpha(cg, chain, stem_end, d):
'''
Get the position of the c-alpha atom furthest from the end of the stem.
'''
seq_ids = True
max_dist = 0
furthest_pos = None
res_ids = it.chain(*cg.get_resseqs(d, seq_ids=seq_ids))
for chainId, i in res_ids: # seq_ids now contain chain
try:
c_apos = chain[i][REFERENCE_CATOM].get_vector().get_array()
except KeyError as ke:
print("Nucleotide %s missing in element %s" %
(str(i), d), file=sys.stderr)
continue
dist = cuv.magnitude(stem_end - c_apos)
if dist >= max_dist:
max_dist = dist
furthest_pos = c_apos
return furthest_pos
[docs]def stem_from_chains(cg, chains, elem_name):
"""
This function combines get_mids and get_twists into one more efficient routine.
:param chains: A dictionary {chain_id: Biopython_PDB_chain}
:param elem_name: e.g. "s0"
"""
stem_length = cg.stem_length(elem_name)
template_filename = 'ideal_1_%d_%d_%d.pdb' % (stem_length, stem_length + 1,
stem_length * 2)
filename = forgi.threedee.data_file(op.join('data', template_filename))
try:
ideal_chain = ftup.get_first_chain(filename)
except IOError:
if stem_length > 40:
raise CgConstructionError("Cannot create coordinates. "
"Helices with lengths greater than 40 are currently not supported in forgi.")
else:
raise
stem_chain = bpdb.Chain.Chain(' ')
try:
residue_ids = cg.get_resseqs(elem_name, seq_ids=True)
except IndexError as e:
with log_to_exception(log, e):
log.error("seq_ids were '%r'", cg.seq_ids)
raise
new_residue_ids = []
for strand in residue_ids:
for res_id in strand:
log.debug("Adding residue %s", res_id)
original_residue = chains[res_id.chain][res_id.resid]
residue = original_residue.copy()
try:
stem_chain.add(residue)
except Bio.PDB.PDBExceptions.PDBConstructionException as e:
log.info(
"Temporarily changing resid %s to uuid, because this id is present twice (with different chain) in one stem", residue.id)
change_residue_id(residue, uuid.uuid4())
stem_chain.add(residue)
new_residue_ids.append(residue.id)
rotran = ftup.pdb_rmsd(stem_chain, ideal_chain, sidechains=False,
superimpose=True )[2]
# average length of a base-pair: 2.547
mult = 0.01 # Stems with 1 bp have a tiny length
ideal_coords = np.array([[0., 0., mult],
np.array([0., 0., -mult]) + (stem_length - 1) * np.array([0., 0., -2.547])])
coords = np.dot(ideal_coords, rotran[0]) + rotran[1]
stem_direction = coords[1] - coords[0]
# the first nucleotide of the first strand
# and the last nucleotide of the second strand
first_res = residue_ids[0][0]
start_vec1 = chains[first_res.chain][first_res.resid][REFERENCE_CATOM].coord - coords[0]
last_res = residue_ids[0][-1]
end_vec1 = chains[last_res.chain][last_res.resid][REFERENCE_CATOM].coord - coords[1]
# the last nucleotide of the first strand
# and the first nucleotide of the second strand
first_res_a = residue_ids[1][-1]
try:
start_vec1a = chains[first_res_a.chain][first_res_a.resid][REFERENCE_CATOM].coord - coords[0]
except KeyError as e:
log.error("Atoms are %s", chains[first_res_a.chain][first_res_a.resid].child_dict)
raise
last_res_a = residue_ids[1][0]
end_vec1a = chains[last_res_a.chain][last_res_a.resid][REFERENCE_CATOM].coord - coords[1]
notch1 = cuv.vector_rejection(start_vec1, stem_direction)
notch2 = cuv.vector_rejection(end_vec1, stem_direction)
notch1a = cuv.vector_rejection(start_vec1a, stem_direction)
notch2a = cuv.vector_rejection(end_vec1a, stem_direction)
twists = (cuv.normalize(notch1 + notch1a), cuv.normalize(notch2 + notch2a))
# Perform some verification
if False:
verify_vatom_positions(residue_ids, chains, coords,
twists, "stem_{}_from_chain".format(elem_name))
return coords, twists
[docs]def verify_vatom_positions(residue_ids, chains, coords, twists, label=""):
"""
:param coords: The coords of ONE stem
"""
res1, res2 = residue_ids[0][0], residue_ids[1][-1]
res3, res4 = residue_ids[0][-1], residue_ids[1][0]
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
fig = plt.figure()
ax = Axes3D(fig)
strand0 = np.array(
[chains[r.chain][r.resid]["C1'"].coord for r in residue_ids[0]])
strand1 = np.array(
[chains[r.chain][r.resid]["C1'"].coord for r in residue_ids[1]])
ax.plot(strand0[:, 0], strand0[:, 1],
strand0[:, 2], "o-", label="forward strand")
ax.plot(strand1[:, 0], strand1[:, 1], strand1[:, 2],
"o-", label="backwards strand")
ax.plot([chains[res1.chain][res1.resid]["C1'"].coord[0], chains[res2.chain][res2.resid]["C1'"].coord[0]],
[chains[res1.chain][res1.resid]["C1'"].coord[1],
chains[res2.chain][res2.resid]["C1'"].coord[1]],
[chains[res1.chain][res1.resid]["C1'"].coord[2], chains[res2.chain][res2.resid]["C1'"].coord[2]], "--", label="bp")
ax.plot([chains[res3.chain][res3.resid]["C1'"].coord[0], chains[res4.chain][res4.resid]["C1'"].coord[0]],
[chains[res3.chain][res3.resid]["C1'"].coord[1],
chains[res4.chain][res4.resid]["C1'"].coord[1]],
[chains[res3.chain][res3.resid]["C1'"].coord[2], chains[res4.chain][res4.resid]["C1'"].coord[2]], "--", label="bp")
ax.plot(coords[:, 0], coords[:, 1], coords[:, 2], "o-", label="STEM")
twist1 = np.array([coords[0], coords[0] + twists[0],
coords[0] + twists[0] * 10])
twist2 = np.array([coords[1], coords[1] + twists[1],
coords[1] + twists[1] * 10])
ax.plot(twist1[:, 0], twist1[:, 1], twist1[:, 2], "o-", label="Twist1")
ax.plot(twist2[:, 0], twist2[:, 1], twist2[:, 2], "o-", label="Twist1")
# Virtual atoms
vres_pos = []
vres_bases = []
c1_vecs = []
for i, res in enumerate(residue_ids[0]):
pos = virtual_res_3d_pos_core(
coords, twists, i, len(residue_ids[0]))[0]
vres_pos.append(pos)
basis = virtual_res_basis_core(coords, twists, i, len(residue_ids[0]))
vres_bases.append(basis)
c1_vecs.append(ftuv.change_basis(
chains[res.chain][res.resid]["C1'"].coord - vres_pos[-1], vres_bases[-1], ftuv.standard_basis))
#print("C1 vecs:", c1_vecs)
av_c1_vec = np.sum(c1_vecs, axis=0) / len(c1_vecs)
#print("Av C1' vec = ", av_c1_vec)
virtual_c1s = []
for pos, basis in zip(vres_pos, vres_bases):
virtual_c1s.append(ftuv.change_basis(
av_c1_vec, ftuv.standard_basis, basis) + pos)
virtual_c1s = np.array(virtual_c1s)
ax.plot(virtual_c1s[:, 0], virtual_c1s[:, 1],
virtual_c1s[:, 2], "o", label="virtual C1'")
ax.set_title(label)
ax.legend()
plt.show()
#assert False
[docs]def total_helix_rotation(coords, twists, stem_len):
"""
Calculate the total rotation of the helix in radians from the twists.
When we calculate the angle between the two twists, we only know
the true rotation modulo 2*pi (i.e. a rotation of 45 degrees could
mean 45 degrees or 405 degrees). Depending on the number of nucleotides and
knowledge of the ideal helix (which turns roughly 30 degrees per base-pair),
this function outputs the correct result.
"""
stem_vec = coords[1] - coords[0]
# the angle of the second twist with respect to the first
stem_basis = cuv.create_orthonormal_basis(stem_vec, twists[0])
t2 = cuv.change_basis(twists[1], stem_basis, cuv.standard_basis)
twist_angle = ftum.atan3(t2[2], t2[1])
# calculated from an ideal length 30 helix
average_ang_per_nt = 0.636738030735
expected_total_ang = (stem_len - 1) * average_ang_per_nt
expected_twist_ang = expected_total_ang % (2 * math.pi)
[docs]def virtual_res_3d_pos_core(coords, twists, i, stem_len, stem_inv=None):
'''
Calculate the virtual position of the i'th nucleotide in the stem.
The virtual position extrapolates the position of the residues based
on the twists of the helix.
:return: A tuple containing the point located on the axis of the stem
and a vector away from that point in the direction of the
residue.
'''
#stem_len = bg.defines[stem][1] - bg.defines[stem][0] + 1
stem_vec = coords[1] - coords[0]
# the position of the virtual residue along the axis of
# the stem
if stem_len == 1:
vres_stem_pos = coords[0]
else:
vres_stem_pos = coords[0] + (i / float(stem_len - 1)) * stem_vec
# the angle of the second twist with respect to the first
if stem_inv is None:
stem_basis = cuv.create_orthonormal_basis(stem_vec, twists[0])
t2 = cuv.change_basis(twists[1], stem_basis, cuv.standard_basis)
else:
t2 = np.dot(stem_inv, twists[1])
ang = ftum.atan3(t2[2], t2[1])
# calculated from an ideal length 30 helix
average_ang_per_nt = 0.636738030735
expected_ang = (stem_len - 1) * average_ang_per_nt
expected_dev = expected_ang
while (expected_dev - (2 * math.pi) > 0):
expected_dev -= 2 * math.pi
# expected_dev uis now between 0 and 360 degrees
if ang < expected_dev:
forward = 2 * math.pi + ang - expected_dev
backward = expected_dev - ang
else:
forward = ang - expected_dev
backward = 2 * math.pi + expected_dev - ang
if forward < backward:
ang = expected_ang + forward
else:
ang = expected_ang - backward
if stem_len == 1:
ang = 0.
else:
ang_per_nt = ang / float(stem_len - 1)
ang = ang_per_nt * i
# the basis vectors for the helix along which the
# virtual residues will residue
u = twists[0]
v = cuv.normalize(np.cross(stem_vec, twists[0]))
ang_offset = 0.9
# equation for a circle in 3-space
return (vres_stem_pos,
u * math.cos(ang) + v * math.sin(ang),
u * math.cos(ang + ang_offset) + v * math.sin(ang + ang_offset),
u * math.cos(ang - ang_offset) + v * math.sin(ang - ang_offset))
[docs]def virtual_res_3d_pos(bg, stem, i, stem_inv=None, stem_length=None):
if stem_length is None:
return virtual_res_3d_pos_core(bg.coords[stem], bg.twists[stem], i,
bg.stem_length(stem), stem_inv)
else:
return virtual_res_3d_pos_core(bg.coords[stem], bg.twists[stem], i,
stem_length, stem_inv)
[docs]def virtual_res_basis_core(coords, twists, i, stem_len, vec=None):
'''
Define a basis based on the location of a virtual stem residue.
The basis will be defined by the direction of the stem, the direction
of the virtual residue.
:param bg: The BulgeGraph structure
:param stem: The name of the stem
:param i: The i'th residue of the stem
:return: A 3x3 matrix defining the coordinate system above.
'''
if vec is None:
(pos, vec, vec_l, vec_r) = virtual_res_3d_pos_core(coords, twists,
i, stem_len)
stem_vec = coords[1] - coords[0]
return cuv.create_orthonormal_basis(stem_vec, vec)
[docs]def virtual_res_basis(bg, stem, i, vec=None):
return virtual_res_basis_core(bg.coords[stem], bg.twists[stem], i,
bg.stem_length(stem), vec)
[docs]def pos_to_spos(bg, s1, i1, s2, i2):
'''
Convert the location of s2, i2 into the coordinate system
defined by (s1, i1)
:param bg: The BulgeGraph containing the stems
:param s1: The basis stem name
:param i1: The basis res position
:param s2: The stem containing the nucleotide to be converted
:param i2: The nucleotide to be converted position
'''
sbasis = virtual_res_basis(bg, s1, i1)
(s1_pos, s1_vec, s1_vec_l, s1_vec_r) = virtual_res_3d_pos(bg, s1, i1)
(s2_pos, s2_vec, s2_vec_l, s2_vec_r) = virtual_res_3d_pos(bg, s2, i2)
#rpos = (s2_pos + 7. * s2_vec) - (s1_pos + 7 * s1_vec)
rpos = (s2_pos + 7. * s2_vec) - (s1_pos)
# print "sbasis:", sbasis
spos = cuv.change_basis(rpos, sbasis, cuv.standard_basis)
'''
if spos[1] ** 2 + spos[2] ** 2 < 5 and spos[0] > -5 and spos[0] < 5:
print >>sys.stderr, "spos:", spos, s1, i1, s2, i2
'''
return spos
"""
def spos_to_pos(bg, stem, i, spos):
'''
Convert the location of spos from the coordinate system
of (stem, i) into the standard coordinate system.
:param bg: The BulgeGraph
:param stem: The name of the stem in the BulgeGraph
:param i: The i'th residue in 'stem' which will define the coordinate
system
:param spos: The position in the alternate coordinate system
:return: The coordinates in the cartesian coordinate system of the
rest of the model.
'''
if stem in bg.vbases and i in bg.vbases[stem]:
sbasis=bg.vbases[stem][i]
else:
sbasis = virtual_res_basis(bg, stem, i)
pos = cuv.change_basis(spos, cuv.standard_basis, sbasis)
try:
(s1_pos, s1_vec, s1_vec_l, s1_vec_r) = bg.v3dposs[stem][i]
except KeyError as e:
log.info("in spos_to_pos: KeyError {}. Adding virtual residues for stem {}".format(e, stem))
add_virtual_residues(bg, stem)
(s1_pos, s1_vec, s1_vec_l, s1_vec_r) = bg.v3dposs[stem][i]
#return pos + (s1_pos + s1_vec) #TODO BT: THIS SEEMS WRONG
return pos + s1_pos
"""
[docs]def get_residue_type(i, stem_len):
'''
Each nucleotide will be classified according to its position
within the stem. That way, the distribution of surrounding
nucleotides will be conditioned on the type of nucleotides.
This is important due to the fact that nucleotides at the end
of a stem may have other stem nucleotides in the direction
of the stem vector. Nucleotides, in the middle shoubulge not due
to the excluded volume of the stem they occupy.
:param i: The position of the nucleotide.
:param stem_len: The length of the stem.
:return: The type of nucleotide position.
'''
assert(i < stem_len)
return 0
[docs]def junction_virtual_res_distance(bg, bulge):
'''
Compute the distance between the two virtual residues flanking
a bulge region.
:param bg: The BulgeGraph containing the bulge.
:param bulge: The name of the bulge.
'''
cs = list(bg.edges[bulge])
(s1b, s1e) = bg.get_sides(cs[0], bulge)
(s2b, s2e) = bg.get_sides(cs[1], bulge)
if s1b == 1:
res = bg.v3dposs[cs[0]][bg.stem_length(cs[0]) - 1]
else:
res = bg.v3dposs[cs[0]][0]
(vr1_p, vr1_v, vr1_v_l, vr1_v_r) = res
if s2b == 1:
res = bg.v3dposs[cs[1]][bg.stem_length(cs[1]) - 1]
else:
res = bg.v3dposs[cs[1]][0]
(vr2_p, vr2_v, vr2_v_l, vr2_v_r) = res
dist2 = cuv.vec_distance((vr1_p + 7. * vr1_v), (vr2_p + 7. * vr2_v))
return dist2
"""
def get_strand_atom_vrn(bg, s, i):
'''
Return the strand and which atom to use for the adjacent
nucleotide distance calculation.
'''
if i == 0:
return (0, 'P', 0)
# this might have to just be bg.stem_length(s)
if i == 1:
return (0, 'O3*', bg.stem_length(s) - 1)
if i == 2:
return (1, 'P', bg.stem_length(s) - 1)
if i == 3:
return (1, 'O3*', 0)
"""
[docs]def junction_virtual_atom_distance(bg, bulge):
'''
Compute the distance between the O3' atom and P' atom
of the two residues that flank the junction segment.
:param bg: The BulgeGraph containing the bulge.
:param bulge: The name of the bulge
:return: A single number corresponding to the distance above.
'''
connecting_stems = bg.connections(bulge)
(i1, k1) = bg._get_sides_plus(connecting_stems[0], bulge)
(i2, k2) = bg._get_sides_plus(connecting_stems[1], bulge)
pos1 = bg.defines[connecting_stems[0]][i1]
pos2 = bg.defines[connecting_stems[1]][i2]
if bulge[0] == "m":
assert list(sorted([pos1, pos2])) == bg.flanking_nucleotides(bulge)
if i1 == 0 or i1 == 2:
a1 = "P"
else:
a1 = "O3'"
if i2 == 0 or i2 == 2:
a2 = "P"
else:
a2 = "O3'"
assert a1 != a2
dist = cuv.magnitude(bg.virtual_atoms(
pos1)[a1] - bg.virtual_atoms(pos2)[a2])
# if bg.element_length(bulge)==0:
# partner1 = bg.pairing_partner(pos1)
# partner2 = bg.pairing_partner(pos2)
# dist2 = cuv.magnitude(bg.virtual_atoms(pos1)[a2]-bg.virtual_atoms(pos2)[a1])
# dist3 = cuv.magnitude(bg.virtual_atoms(partner1)[a1]-bg.virtual_atoms(partner2)[a2])
# dist4 = cuv.magnitude(bg.virtual_atoms(partner1)[a2]-bg.virtual_atoms(partner2)[a1])
# assert dist < dist2, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist2)
# assert dist < dist3, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist3)
# assert dist < dist4, "{} ({} nts): {} !< {}".format(bulge, bg.element_length(bulge), dist, dist4)
return dist
[docs]@profile
def add_virtual_residues(bg, element):
'''
Create all of the virtual residues and the associated
bases and inverses for the given stem.
.. note::
This is a low-level function used if only the virtual residues of a single
stems should be added. To add the virtual residues for all stems, use
`cg.add_all_virtual_residues`
:param bg: The CoarseGrainRNA bulge graph containing the stem
:param element: The name of the stem to be included
'''
if element[0] == "s":
return _add_stem_virtual_residues(bg, element)
else:
return _add_loop_virtual_residues(bg, element)
def _add_loop_virtual_residues(cg, element):
if not cg.chains:
log.info(
"No virtual residues added for %s, because no pdb chain present", element)
return
for i, resid in enumerate(cg.define_residue_num_iterator(element, seq_ids=True)):
try:
global_coords = cg.chains[resid.chain][resid.resid]["C1'"].coord
except KeyError:
log.warning("Added virtual residue position for residue %s will be "
"inaccurate, because no C1' is present. Atoms are %s", resid,
list(cg.chains[resid.chain][resid.resid].child_dict.keys()))
p=np.zeros(3)
i=0
for atom in cg.chains[resid.chain][resid.resid]:
p+=atom.coord
i+=1
global_coords = p/i
origin, basis = element_coord_system(cg, element)
element_coords = ftuv.change_basis(
global_coords - origin, basis, ftuv.standard_basis)
cg.vposs[element][i] = element_coords
def _add_stem_virtual_residues(bg, stem):
stem_vec = bg.coords.get_direction(stem)
twist_vec = bg.get_twists(stem)[0]
if stem in bg.bases and np.allclose(stem_vec, bg.bases[stem][0]) and np.allclose(twist_vec, bg.bases[stem][1]):
stem_inv = bg.stem_invs[stem]
else:
stem_basis = cuv.create_orthonormal_basis(stem_vec, twist_vec)
stem_inv = nl.inv(stem_basis.transpose())
bg.bases[stem] = stem_basis
bg.stem_invs[stem] = stem_inv
for i in range(bg.stem_length(stem)):
vpos = virtual_res_3d_pos(bg, stem, i, stem_inv=stem_inv)
vbasis = virtual_res_basis(bg, stem, i, vec=vpos[1])
vinv = nl.inv(vbasis.transpose())
bg.vposs[stem][i] = vpos[0]
bg.vvecs[stem][i] = vpos[1]
bg.v3dposs[stem][i] = vpos
bg.vbases[stem][i] = vbasis
bg.vinvs[stem][i] = vinv
[docs]def stem_vres_reference_atoms(bg, s, i):
'''
Calculate the position of each atom in the reference of the
stem and virtual residue.
:param bg: The BulgeGraph
:param s: The stem identifier
:param i: The i'th base-pair in the stem
:return (origin, basis, [dict(atoms), dict(atoms)])
The origin of the coordinate system (vpos)
The basis of the virtual residue
Two dictionaries containing the positions of each atom in the coordinate system of the virtual residue
'''
coords = [dict(), dict()]
(vpos, vvec, vvec_l, vvec_r) = virtual_res_3d_pos(bg, s, i)
#vec1 = cuv.normalize(bg.coords[s][1] - bg.coords[s][0])
#vec2 = cuv.normalize(vvec)
stem_direction = bg.coords[s][1] - bg.coords[s][0]
twist = vvec
basis = cuv.create_orthonormal_basis(stem_direction, twist)
residue_ids = bg.get_resseqs(s, seq_ids=True)
for strand in [0, 1]:
if strand == 0:
res_id = residue_ids[0][i]
else:
res_id = residue_ids[1][-(1 + i)]
for atom in ftup.all_rna_atoms:
res = bg.chains[res_id.chain][res_id.resid]
try:
c = res[atom].coord
except KeyError:
continue
else:
new_c = cuv.change_basis(c - vpos, basis, cuv.standard_basis)
log.debug("Atom %s has coords %s", atom, new_c)
coords[strand][atom] = new_c
return (vpos, basis, coords)
[docs]def bounding_boxes(bg, s, i):
'''
Return the bounding boxes of the two nucleotides at the
i'th position on the stem.
:param bg: A BulgeGraph where bg.chains is not None
:param s: The stem identifier
:param i: The i'th base-pair in the stem
:return: (origin, basis, [(c1, c2), (c1, c2)]) The bases
and the corners defining the bounding box
of the two nucleotides
'''
(vpos, basis, atoms) = stem_vres_reference_atoms(bg, s, i)
corners = []
for k in range(2):
min_c = [10000., 10000., 10000.]
max_c = [-10000., -10000., -10000.]
for atom in atoms[k].values():
for j in range(3):
min_c[j] = min(min_c[j], atom[j])
max_c[j] = max(max_c[j], atom[j])
n = min_c
x = max_c
corners += [(n, x)]
return (vpos, basis, corners)
[docs]def virtual_residue_atoms(bg, s, i, strand=0):
'''
Return the atoms for the virtual residue.
:param bg: The BulgeGraph
:param s: The stem
:param i: The virtual residue number
:param strand: The strand for which to get the virtual atoms
'''
'''
if vpos == None or vvec == None:
(vpos, vvec, vvec_l, vvec_r) = virtual_res_3d_pos(bg, s, i)
if basis == None:
basis = virtual_res_basis(bg, s, i, vvec).transpose()
'''
if s[0] != "s":
raise ValueError(
"Expected stem (not single-stranded RNA element), got {}".format(s))
glob_pos = (bg.defines[s][0] + i, bg.defines[s][3] - i)
glob_pos = glob_pos[strand]
return bg.virtual_atoms(glob_pos)
[docs]def calc_R(xc, yc, p):
""" calculate the distance of each 2D points from the center (xc, yc) """
return np.sqrt((p[:, 0] - xc) ** 2 + (p[:, 1] - yc) ** 2)
[docs]def f_2(c, p):
""" calculate the algebraic distance between the data points and the mean
circle centered at c=(xc, yc) """
Ri = calc_R(*c, p=p)
return Ri - Ri.mean()
[docs]def circle_fit(p):
x = p[:, 0]
y = p[:, 1]
x_m = np.mean(x)
y_m = np.mean(y)
u = x - x_m
v = y - y_m
# linear system defining the center (uc, vc) in reduced coordinates:
# Suu * uc + Suv * vc = (Suuu + Suvv)/2
# Suv * uc + Svv * vc = (Suuv + Svvv)/2
Suv = sum(u * v)
Suu = sum(u ** 2)
Svv = sum(v ** 2)
Suuv = sum(u ** 2 * v)
Suvv = sum(u * v ** 2)
Suuu = sum(u ** 3)
Svvv = sum(v ** 3)
# Solving the linear system
A = np.array([[Suu, Suv], [Suv, Svv]])
B = np.array([Suuu + Suvv, Svvv + Suuv]) / 2.0
uc, vc = nl.solve(A, B)
xc_1 = x_m + uc
yc_1 = y_m + vc
return (xc_1, yc_1)
'''
Ri_1 = sqrt((x-xc_1)**2 + (y-yc_1)**2)
R_1 = mean(Ri_1)
residu_1 = sum((Ri_1-R_1)**2)
return (xc_1, yc_1, R_1)
'''
[docs]def circle_error(c, p):
errors = f_2(c, p)
return sum([e ** 2 for e in errors])
[docs]def f_3(vec, points, est):
""" calculate the optimal circle for the points (p) projected onto
the plane orthogonal to v """
basis = cuv.create_orthonormal_basis(vec)
new_points = cuv.change_basis(points.T, basis, cuv.standard_basis).T
p = new_points[:, 1:]
#center_2, ier=so.leastsq(f_2, center_estimate,args=p)
center_2 = circle_fit(p)
return f_2(center_2, p)
[docs]def fit_circle(mids, points, start_pos, end_pos):
'''
Calculate the projection of points on the plane normal to
vec and fit a circle to them.
'''
with warnings.catch_warnings():
warnings.simplefilter("ignore")
v1, ier = so.leastsq(f_3, mids[1] - mids[0],
args=(points, mids[0][1:]))
basis1 = cuv.create_orthonormal_basis(v1)
points1 = cuv.change_basis(points.T, basis1, cuv.standard_basis).T
start_pos1 = cuv.change_basis(start_pos, basis1, cuv.standard_basis)
end_pos1 = cuv.change_basis(end_pos, basis1, cuv.standard_basis)
center_5 = circle_fit(points1[:, 1:])
mids_stem_basis = [[start_pos1[0], center_5[0], center_5[1]],
[end_pos1[0], center_5[0], center_5[1]]]
mids_standard_basis = cuv.change_basis(np.array(mids_stem_basis).T,
cuv.standard_basis, basis1).T
'''
# works!
mids_stem_basis = [[nmids[0][0], center_4[0], center_4[1]],
[nmids[1][0], center_4[0], center_4[1]]]
mids_standard_basis = cuv.change_basis(np.array(mids_stem_basis).T,
cuv.standard_basis,
basis).T
'''
return mids_standard_basis
#cg.sampled[d] = [cg.name] + cg.defines[d]
[docs]def get_incomplete_elements(cg):
"""
Get an estimated list of cg-elements which have missing residues in the PDB.
One of many problems with PDB data are residues, for which no
coordinates could be determined experimentally. This function gives
an estimated list of cg-elements, which are affected by missing residues.
"""
incomplete = set()
for elem in cg.defines:
for r in cg.define_range_iterator(elem, adjacent=elem[0] != "s"):
if cg.seq[r[0]:r[1]] != cg.seq.with_missing[r[0]:r[1]]:
incomplete.add(elem)
return incomplete
def _add_loop_vres(cg):
if len(cg.defines) < 2:
return # fifeprime only-cgs have no twists
log.debug("Adding virtual residues")
for elem in cg.defines:
if elem[0] != "s":
try:
add_virtual_residues(cg, elem)
except:
log.warning("Could not add virtual residues from PDB for %s, elem %s", cg.name, elem)
[docs]def cylinder_works(cg, cylinders_to_stems, tv, c, r=4.):
'''
Check if all of these points are inside the cylinder.
'''
points = [cg.coords[tv][0], cg.coords[tv][1]]
for s in cylinders_to_stems[c]:
points += [cg.coords[s][0], cg.coords[s][1]]
data = np.array(points)
datamean = data.mean(axis=0)
uu, dd, vv = np.linalg.svd(data - datamean)
n = vv[0]
p = data
a = datamean
dist_vec = (a - p) - (np.dot((a - p), n)[:, np.newaxis]) * n # pylint: disable=invalid-sequence-index
mags = [ftuv.magnitude(c) for c in dist_vec]
'''
linepts = vv[0] * np.mgrid[-7:7:2j][:, np.newaxis]
linepts += datamean
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as m3d
ax = m3d.Axes3D(plt.figure())
ax.scatter3D(*data.T)
ax.plot3D(*linepts.T)
'''
if max(mags) > r:
return False
return True
[docs]def get_encompassing_cylinders(cg, radius=6.):
visited = set()
# the stems_in_cylinders dictionary will be indexed by stem name and contain
# the number of the cylinder it contains
#stems_to_cylinders = {'s0': 0}
stems_to_cylinders = dict()
cylinders_to_stems = col.defaultdict(list)
#cylinders_to_stems = {0: ['s0']}
# the first cylinder is equal to the first stem
#cylinders = {0: cg.coords['s0']}
to_visit = [random.choice(list(cg.defines.keys()))]
cylinder_counter = 0
while to_visit:
tv = to_visit.pop(0)
if tv in visited:
continue
visited.add(tv)
for e in cg.edges[tv]:
to_visit.append(e)
# not interested in non- stem, multiloop or interior loop elements
if tv[0] != 's' and tv[0] != 'm' and tv[0] != 'i':
continue
#cylinders_to_check = set(cylinders_to_stems.keys())
cylinders_to_check = set()
# find which cylinders we need to check
for e in cg.edges[tv]:
if e in stems_to_cylinders:
cylinders_to_check.add(stems_to_cylinders[e])
found = False
for c in sorted(cylinders_to_check, key=lambda x: -sum([cg.stem_length(k) for k in cylinders_to_stems[x]])):
# the new node will definitely be at the end of the cylinder
# print "checking...:", c, tv
if cylinder_works(cg, cylinders_to_stems, tv, c, radius):
cylinders_to_stems[c] += [tv]
stems_to_cylinders[tv] = c
found = True
break
if not found:
# no appropriately sized cylinder has been found so we
# just create new one containing just this stem
cylinder_counter += 1
cylinders_to_stems[cylinder_counter] += [tv]
stems_to_cylinders[tv] = cylinder_counter
return cylinders_to_stems
[docs]def element_coord_system(cg, d):
'''
Get a coordinate system for a particular coarse grain element.
If an element has an axis vector, a, twist vectors t1 and t2,
then the coordinate system will be a normalized version
of the axis a, the second, v2, will be equal to norm((t1 + t2) / 2.)
And the third will be equal to a x v2.
'''
vec_axis = ftuv.normalize(cg.coords[d][1] - cg.coords[d][0])
twists = cg.get_twists(d)
mid_twist = ftuv.normalize(twists[0] + twists[1])
assert abs(np.dot(vec_axis, twists[0])) < 10**- \
10, "{}: {}".format(d, abs(np.dot(vec_axis, twists[0])))
assert abs(np.dot(vec_axis, twists[1])) < 10**-10
return (((cg.coords[d][0] + cg.coords[d][1]) / 2.),
ftuv.create_orthonormal_basis(vec_axis, mid_twist))
[docs]def virtual_atoms(cg, given_atom_names=None, sidechain=True):
'''
Get a list of virtual atoms for this structure.
:param cg: The coarse grain structure.
'''
return VirtualAtomsLookup(cg, given_atom_names, sidechain)
# Module-level var used for caching.
_average_atom_positions = None
[docs]class VirtualAtomsLookup(object):
"""
An object with a dict-like interface that calculates the virtual atom positions on demand.
"""
def __init__(self, cg, given_atom_names=None, sidechain=True):
"""
:param cg: The coarse grain structure, for which the virtual atoms are generated.
..note ::
If cg is modified, new virtual atom positions are calculated.
"""
self.cg = cg
self.given_atom_names = given_atom_names
self.sidechain = sidechain
#@profile
def __getitem__(self, position):
"""
:returns: A dictionary containing all atoms (as keys) and their
positions (as values) for the given residue.
:param position: The position of the residue in the RNA (starting with 1)
"""
# Find out the stem for which we have to calculate virtual atom positions
for key, value in self.cg.defines.items():
if len(value) < 2:
continue # For multiloops of length 0, value is []
elif position >= value[0] and position <= value[1]:
return self._getitem_for_element(key, position)
elif len(value) == 4 and position >= value[2] and position <= value[3]:
return self._getitem_for_element(key, position)
assert False, "No return for pos {}".format(position)
#@profile
[docs] def keys(self):
k = set()
for value in self.cg.defines.values():
if len(value) > 1:
for i in range(value[0], value[1] + 1):
k.add(i)
if len(value) > 3:
for i in range(value[2], value[3] + 1):
k.add(i)
return k
def _getitem_for_element(self, d, pos):
"""
:returns: A dictionary containing all atoms (as keys) and
their positions (as values) for the given residue.
:param d: The coarse grained element (e.g. "s1")
:param pos: The position of the residue. It has to be in the element d!
"""
global _average_atom_positions
if d[0] == "s":
# Use virtual residues for stems.
return self._getitem_for_stem(d, pos)
if _average_atom_positions is None:
log.info("LOADING AV_ATOM_POS")
import pkgutil
data = pkgutil.get_data(
'forgi', 'threedee/data/average_atom_positions.json')
_average_atom_positions = json.loads(data.decode("ascii"))
log.debug("Using loaded av_atom_pos")
e_coords = dict()
try:
origin, basis = element_coord_system(self.cg, d)
except ValueError as e:
# 0-length hairpin.
if d[0] == "h" and np.array_equal(self.cg.coords[d][0], self.cg.coords[d][1]):
warnings.warn(
"Returning empty set of virtual atoms for 0-length hairpin")
return e_coords
else:
raise
print(e, "for position {} in element {} with define {}".format(
pos, d, self.cg.defines[d]))
raise
if d[0] == 'i' or d[0] == 'm':
conn = self.cg.connections(d)
conn_type = self.cg.connection_type(d, conn)
else:
conn_type = 0
for i, r in zip(it.count(), self.cg.define_residue_num_iterator(d)):
if r != pos:
continue
if self.given_atom_names is None:
if self.sidechain:
# Seq is now 1-based
atom_names = (ftup.nonsidechain_atoms + [
self.cg.seq[r] + "." + x for x in ftup.side_chain_atoms[self.cg.seq[r]]])
else:
atom_names = ftup.nonsidechain_atoms
else:
atom_names = self.given_atom_names
for aname in atom_names:
identifier = "%s %s %d %d %s" % (d[0],
" ".join(
map(str, self.cg.get_node_dimensions(d))),
conn_type, i, aname)
if "." in aname:
_, _, aname = aname.partition(".")
try:
e_coords[aname] = origin + ftuv.change_basis(
np.array(_average_atom_positions[identifier]), ftuv.standard_basis, basis)
except KeyError as ke:
#warnings.warn("KeyError in virtual_atoms. No coordinates found for: {}".format(ke))
pass
return e_coords
def _getitem_for_stem(self, d, pos):
log.debug("getitem_for_stem %s, pos %s", d, pos)
pos_in_stem, side = self.cg.stem_resn_to_stem_vres_side(d, pos)
assert pos >= 1
try:
residue = (self.cg.seq[pos])
except IndexError as e:
with log_to_exception(log, e):
log.error("position {} not in sequence {}".format(
pos - 1, self.cg.seq))
raise
if self.given_atom_names is None:
if self.sidechain:
atom_names = (ftup.nonsidechain_atoms +
ftup.side_chain_atoms[residue])
else:
atom_names = ftup.nonsidechain_atoms
else:
atom_names = self.given_atom_names
atom_keys = []
atom_coords = []
for aname in atom_names:
if aname[-1] == "*":
aname_dash = aname[:-1] + "'"
else:
aname_dash = aname
spos = ftus.avg_stem_vres_atom_coords[side][residue][aname_dash]
# TODO: Maybe we can vectorize this and calculate pos from spos for all atoms of the residue at once.
atom_keys.append(aname)
atom_coords.append(spos)
try:
# virtual_res_basis(self.cg, d, pos_in_stem)
vres_basis = self.cg.vbases[d][pos_in_stem]
# virtual_res_3d_pos(self.cg, d, pos_in_stem)[0]
vres_pos = self.cg.vposs[d][pos_in_stem]
except KeyError:
self.cg.add_all_virtual_residues()
# virtual_res_basis(self.cg, d, pos_in_stem)
vres_basis = self.cg.vbases[d][pos_in_stem]
# virtual_res_3d_pos(self.cg, d, pos_in_stem)[0]
vres_pos = self.cg.vposs[d][pos_in_stem]
atom_coords = ftuv.change_basis_vectorized(
np.array(atom_coords), ftuv.standard_basis, vres_basis) + vres_pos
return {aname: coord for aname, coord in zip(atom_keys, atom_coords)}
[docs]def vres_to_global_coordinates(vres_pos, vres_basis, positions):
newpos = {}
for key, v_pos in positions.items():
pos = ftuv.change_basis(v_pos, ftuv.standard_basis, vres_basis)
newpos[key] = pos + vres_pos
return newpos
[docs]def element_distance(cg, l1, l2):
'''
Calculate the distance between the two closest points of these
two elements.
'''
(i1, i2) = ftuv.line_segment_distance(cg.coords[l1][0],
cg.coords[l1][1],
cg.coords[l2][0],
cg.coords[l2][1])
return ftuv.vec_distance(i1, i2)
[docs]def get_basepair_center(cg, pos):
"""
The center of a basepair, as defined in doi: 10.1261/rna.305307
:param pos: The number of one of the two pairing bases
"""
pos2 = cg.pairing_partner(pos)
seq1 = cg.seq[pos - 1]
seq2 = cg.seq[pos2 - 1]
atoms = {"A": ["C1'", "C8"], "G": ["C1'", "C8"],
"U": ["C1'", "C6"], "C": ["C1'", "C6"]}
va1 = cg.virtual_atoms(pos)
va2 = cg.virtual_atoms(pos2)
avpos = np.zeros(3)
try:
for atom in atoms[seq1]:
avpos += va1[atom]
for atom in atoms[seq2]:
avpos += va2[atom]
except KeyError:
log.error("%s\n%s", va1.keys(), va2.keys())
raise
avpos /= (len(atoms[seq1]) + len(atoms[seq2]))
return avpos
[docs]def get_basepair_plane(cg, pos):
"""
The plane of the basepair, as defined in figure 13 of doi: 10.1261/rna.305307
:param pos: The number of one of the two pairing bases
"""
pos2 = cg.pairing_partner(pos)
seq1 = cg.seq[pos - 1]
seq2 = cg.seq[pos2 - 1]
va1 = cg.virtual_atoms(pos)
va2 = cg.virtual_atoms(pos2)
h_bonds = {"U": {"A": [("O4", "N6"), ("N3", "N1")],
"G": [("N3", "O6"), ("O2", "N1")]},
"A": {"U": [("N6", "O4"), ("N1", "N3")]},
"G": {"U": [("O6", "N3"), ("N1", "O2")],
"C": [("O6", "N4"), ("N1", "N3"), ("N2", "O2")]},
"C": {"G": [("N4", "O6"), ("N3", "N1"), ("O2", "N2")]}
}
#print( seq1, seq2 )
try:
hb = h_bonds[seq1][seq2]
except KeyError:
# Non-canonical basepair
warnings.warn("Estimating plane from stem vector for "
" non-canonical basepair {}-{} at positions"
" {},{}".format(seq1, seq2, pos, pos2))
# ValueError, if cg.pairing_partner is buggy
stem, = cg.nucleotides_to_elements([pos, pos2])
return cg.coords[stem][0] - cg.coords[stem][1]
else:
plane = np.zeros(3)
contribs = 0
for l1, l2 in it.combinations(hb, 2):
left_1 = va1[l1[0]]
left_2 = va1[l2[0]]
right_1 = va2[l1[1]]
right_2 = va2[l2[1]]
add = np.cross(right_1 - left_1, right_2 - left_1)
if np.any(plane != np.zeros(3)):
assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}"
" degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane))))
plane += add
add = np.cross(right_1 - left_1, left_2 - right_1)
assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}"
" degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane))))
plane += add
add = np.cross(right_2 - left_2, right_2 - left_1)
assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}"
" degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane))))
plane += add
add = np.cross(right_2 - left_2, left_2 - right_1)
assert ftuv.vec_angle(add, plane) < math.radians(15), ("{}-{}: {}, {}: {}"
" degrees".format(seq1, seq2, plane, add, math.degrees(ftuv.vec_angle(add, plane))))
plane += add
return ftuv.normalize(plane)