Source code for RNAsketch.State

#!/usr/bin/env python
'''
    State.py: Class as wrapper for ViennaRNA and Nupack
    functions to design an RNA molecule.
    This implements the various states of a riboswitch.
'''

__author__ = "Stefan Hammer"
__copyright__ = "Copyright 2016"
__version__ = "0.1"
__maintainer__ = "Stefan Hammer"
__email__ = "s.hammer@univie.ac.at"


import re
import math
import sys

vrna_available = True
nupack_available = True
pKiss_available = True
HotKnots_available = True

try:
    import RNA
    #RNA.read_parameter_file('/usr/share/ViennaRNA/rna_turner1999.par')
except ImportError as e:
    vrna_available = False
    sys.stderr.write("-" * 60 + "\nWARNING: " + str(e) + "!!!\n" + "-" * 60 + "\n")
    sys.stderr.flush()

try:
    import nupack
except ImportError as e:
    nupack_available = False
    sys.stderr.write("-" * 60 + "\nWARNING: " + str(e) + "!!!\n" + "-" * 60 + "\n")
    sys.stderr.flush()

try:
    import pKiss
except ImportError as e:
    pKiss_available = False
    sys.stderr.write("-" * 60 + "\nWARNING: " + str(e) + "!!!\n" + "-" * 60 + "\n")
    sys.stderr.flush()

try:
    import HotKnots
except ImportError as e:
    HotKnots_available = False
    sys.stderr.write("-" * 60 + "\nWARNING: " + str(e) + "!!!\n" + "-" * 60 + "\n")
    sys.stderr.flush()

if not (vrna_available or nupack_available or pKiss_available or HotKnots_available):
    raise ImportError("Neither ViennaRNA package, Nupack or pKiss found!")

[docs]class State(object): ''' State object holds structure, ligand, constraints and calculates all values for this state. :param structure: Dot-bracket structure string :param parent: Parent Design object ''' def __init__(self, parent, structure=None, temperature=37.0, ligand=None, constraint=None, enforce_constraint=False): if not isinstance(structure, basestring): raise ValueError('Not a structure string: ' + repr(structure)) self._structure = structure self._parent = parent self.reset() self._temperature = temperature self._ligand = ligand self._constraint = constraint self._enforce_constraint = enforce_constraint self._length = None self._cut_points = None self._multifold = None
[docs] def reset(self): self._eos = None self._pos = None self._eos_diff_mfe = None self._eos_reached_mfe = None self._mfe_structure = None self._mfe_energy = None self._pf_structure = None self._pf_energy = None self._ensemble_defect = None
@property def structure(self): ''' :return: Dot-bracket structure as constraint for this state ''' return self._structure @property def temperature(self): ''' :return: Temperature of this state ''' return self._temperature @temperature.setter def temperature(self, t): if not isinstance(t, float): raise ValueError('Temperature must be a float pointing value') self.reset() self._temperature = t @property def ligand(self): ''' :return: Ligand binding definitions for this state; List with sequence pattern, structure pattern and binding energy (soft constraint) ''' return self._ligand @ligand.setter def ligand(self, lig): if (len(lig) != 3) or not isinstance(lig, list): raise ValueError('Ligand must be a list with three items: sequence pattern, structure pattern and binding energy') self.reset() self._ligand = lig @property def constraint(self): ''' :return: Structural constraint of this state (hard constraint) ''' return self._constraint @constraint.setter def constraint(self, constraint): if constraint: create_bp_table(constraint) #check for balanced brackets if (len(constraint) != self.length): raise ValueError('constraint and sequence must have equal length!') self.reset() self._constraint = constraint @property def enforce_constraint(self): ''' :return: Boolean whether to enforce the hard constraint or not ''' return self._enforce_constraint @enforce_constraint.setter def enforce_constraint(self, enforced): if enforced is not self._enforce_constraint: self.reset() self._enforce_constraint = enforced @property def length(self): ''' :return: Length of the design sequence if available, otherwise of the stuctural input ''' if not self._length: if self._parent and self._parent.sequence: self._length = len(self._parent.sequence) else: self._length = len(self._structure) return self._length @property def cut_points(self): ''' :return: List containing the positions of the cut points (& or +) in the sequence ''' if not self._cut_points and self._structure: self._cut_points = [] iterator = re.finditer(re.compile('\&|\+'), self._structure) for match in iterator: self._cut_points.append(match.start()+1) return self._cut_points @property def multifold(self): ''' :return: Number of cut points to specify if it is a cofold or multifold problem ''' if not self._multifold: self._multifold = len(self.cut_points) return self._multifold @property def classtype(self): return None @property def eos(self): ''' :return: Energy of structure given all the properties of this state (constraints, temperature,...) ''' if not self._eos and self._parent.sequence and self._structure: self._eos = self._get_eos(self._parent.sequence, self._structure, self.temperature, self.ligand) return self._eos @property def pos(self): ''' :return: Probability of structure given all the properties of this state (constraints, temperature,...) ''' if not self._pos and self._parent.sequence: self._pos = math.exp((self.pf_energy-self.eos) / self._get_KT(self.temperature) ) return self._pos @property def eos_diff_mfe(self): ''' :return: Energy difference of structure to mfe given all the properties of this state (constraints, temperature,...) ''' if not self._eos_diff_mfe and self._parent.sequence: self._eos_diff_mfe = self.eos - self.mfe_energy return self._eos_diff_mfe @property def eos_reached_mfe(self): ''' :return: Boolean defining whether the eos reached the mfe energy ''' if not self._eos_reached_mfe and self._parent.sequence: if (self.eos == self.mfe_energy): self._eos_reached_mfe = 1 else: self._eos_reached_mfe = 0 return self._eos_reached_mfe @property def mfe_energy(self): ''' :return: MFE value given all the properties of this state (constraints, temperature,...) ''' if not self._mfe_energy and self._parent.sequence: self._calculate_mfe_energy_structure() return self._mfe_energy @property def mfe_structure(self): ''' :return: MFE structure given all the properties of this state (constraints, temperature,...) ''' if not self._mfe_structure and self._parent.sequence: self._calculate_mfe_energy_structure() return self._mfe_structure def _calculate_mfe_energy_structure(self): (structure, energie) = self._get_fold(self._parent.sequence, self.temperature, self.ligand, self.constraint) self._mfe_energy = energie self._mfe_structure = structure @property def pf_energy(self): ''' In case of a dimer cofold calculation with viennarna, the function returns the energy of true hybrid states only. :return: Partition function energy value given all the properties of this state (constraints, temperature,...) ''' if not self._pf_energy and self._parent.sequence: self._calculate_pf_energy_structure() return self._pf_energy @property def pf_structure(self): ''' :return: Partition function consensus structure given all the properties of this state (constraints, temperature,...) ''' if not self._pf_structure and self._parent.sequence: self._calculate_pf_energy_structure() return self._pf_structure def _calculate_pf_energy_structure(self): (structure, energie) = self._get_pf_fold(self._parent.sequence, self.temperature, self.ligand, self.constraint) self._pf_energy = energie self._pf_structure = structure @property def ensemble_defect(self): ''' :return: Ensemble defect value given all the properties of this state (constraints, temperature,...) ''' if not self._ensemble_defect and self._parent.sequence and self._structure: if (len(self._parent.sequence) != len(self._structure)): raise ValueError('sequence and structure must have equal length to calculate the ensemble defect!') self._ensemble_defect = self._get_ensemble_defect(self._parent.sequence, self._structure, self.temperature, self.ligand) return self._ensemble_defect def _get_KT(self, temperature): # KT = (betaScale*((temperature+K0)*GASCONST))/1000.0; /* in Kcal */ return ((temperature + 273.15)*1.98717)/1000.0; def _get_eos(self, sequence, structure, temperature, ligand): raise NotImplementedError def _get_fold(self, sequence, temperature, ligand, constraint): raise NotImplementedError def _get_pf_fold(self, sequence, temperature, ligand, constraint): raise NotImplementedError def _get_ensemble_defect(self, sequence, structure, temperature, ligand): raise NotImplementedError
if vrna_available:
[docs] class vrnaState(State): @property def classtype(self): return 'vrna' def _change_cuts(self, input): return re.sub('[+]', '&', input) def _get_fold_compound(self, sequence, temperature, ligand=None, constraint=None, options=RNA.OPTION_PF): md = RNA.md() md.temperature = temperature md.dangles = 2 fc = RNA.fold_compound(self._change_cuts(sequence), md, options) if ligand: fc.sc_add_hi_motif(ligand[0], ligand[1], ligand[2]) if constraint: if self._enforce_constraint: fc.hc_add_from_db(remove_cuts(constraint), RNA.CONSTRAINT_DB_DEFAULT | RNA.CONSTRAINT_DB_ENFORCE_BP) else: fc.hc_add_from_db(remove_cuts(constraint)) return fc def _get_eos(self, sequence, structure, temperature, ligand=None): if self.multifold > 1: raise NotImplementedError fc = self._get_fold_compound(sequence, temperature, ligand, options=RNA.OPTION_MFE | RNA.OPTION_EVAL_ONLY) return fc.eval_structure(remove_cuts(structure)) def _get_fold(self, sequence, temperature, ligand=None, constraint=None): fc = self._get_fold_compound(sequence, temperature, ligand, constraint, options=RNA.OPTION_MFE) if self.multifold == 0: (structure, energie) = fc.mfe() if self.multifold == 1: (structure, energie) = fc.mfe_dimer() structure = add_cuts(structure, self.cut_points) if self.multifold > 1: raise NotImplementedError return (structure, energie) def _get_pf_fold(self, sequence, temperature, ligand=None, constraint=None): fc = self._get_fold_compound(sequence, temperature, ligand, constraint) if self.multifold == 0: (structure, energie) = fc.pf() if self.multifold == 1: # returns (string structure, float *FA, float *FB, float *FcAB, float *FAB) (structure, _, _, energie, _) = fc.pf_dimer() structure = add_cuts(structure, self.cut_points) elif self.multifold > 1: raise NotImplementedError return (structure, energie) def _get_ensemble_defect(self, sequence, structure, temperature, ligand=None): #TODO finish implementation with pairing matrix. need to ask ronny how it is done now or use old interface fc = self._get_fold_compound(sequence, temperature, ligand) if self.multifold == 0: _,_ = fc.pf() if self.multifold == 1: _,_ = fc.pf_dimer() structure = add_cuts(structure, self.cut_points) elif self.multifold > 1: raise NotImplementedError # get base pairing probability matrix bpm = fc.bpp() # get base pair table bpt = create_bp_table(structure) # delta(i,j, s) = 1 if (i,j) in s, 0 otherwise # d(s1, s2) = sum{i,j in s1}(1-delta{i,j}(s2)) + sum{i,j not in s1}(delta{i,j}(s2)) # ensemble defect = sum{i,j in structure}(1-P(i,j)) + sum{i,j not in structure}(P(i,j)) result = 0 for i in range(0, len(structure)): for j in range(i, len(structure)): if (bpt[i] == j): #print('{0:1.0f}/{1:1.0f}: {2:10.10f}'.format(i, j, bpm[i+1][j+1])) result += 1-bpm[i+1][j+1] else: result += bpm[i+1][j+1] return 2 * result
if nupack_available:
[docs] class nupackState(State): @property def classtype(self): return 'nupack' def _change_cuts(self, input): return re.sub('[&]', '+', input) def _get_eos(self, sequence, structure, temperature, ligand=None): #TODO nupack.energy can not handle unconnected cofold structures return nupack.energy([self._change_cuts(sequence)], self._change_cuts(structure), material = 'rna', pseudo = True, T = temperature) def _get_fold(self, sequence, temperature, ligand=None, constraint=None): nupack_mfe = nupack.mfe([self._change_cuts(sequence)], material = 'rna', pseudo = True, T = temperature) # if str, 0, no error pattern = re.compile('(\[\(\')|(\',)|(\'\)\])') temp_mfe = pattern.sub('', "%s" %nupack_mfe) temp_mfe = temp_mfe.replace("'", "") mfe_list = temp_mfe.split() mfe_struct = mfe_list[0] mfe_energy = float(mfe_list[1]) return mfe_struct, mfe_energy def _get_pf_fold(self, sequence, temperature, ligand=None, constraint=None): # Nupack doesn't return ensemble structure return re.sub('[^\+]', '?', self._change_cuts(sequence)), nupack.pfunc([sequence], material = 'rna', pseudo = True, T = temperature) def _get_ensemble_defect(self, sequence, structure, temperature, ligand=None): return nupack.defect([self._change_cuts(sequence)], structure, material = 'rna', pseudo = True, T = temperature)
if pKiss_available:
[docs] class pKissState(State): @property def classtype(self): return 'pKiss' def _change_cuts(self, input): if re.match(r'[&+]', input): raise IOError('pKiss cannot handle concatenated RNAs') return input def _get_eos(self, sequence, structure, temperature, ligand=None): #TODO pKiss cannot handle ligands # pkiss eval returns multiple answers to the eval question, take lowest energy! shape, energy, structure = pKiss.eval(self._change_cuts(sequence), self._change_cuts(structure), temperature = temperature) return energy def _get_fold(self, sequence, temperature, ligand=None, constraint=None): return pKiss.mfe(self._change_cuts(sequence), temperature = temperature) def _get_pf_fold(self, sequence, temperature, ligand=None, constraint=None): #TODO return mfe as a bad approximation it cannot calculate the partition function return pKiss.mfe(self._change_cuts(sequence), temperature = temperature) def _get_ensemble_defect(self, sequence, structure, temperature, ligand=None): raise NotImplementedError
if HotKnots_available:
[docs] class hotknotsState(State): @property def classtype(self): return 'hotknots' def _change_cuts(self, input): if re.match(r'[&+]', input): raise IOError('Hotknots cannot handle concatenated RNAs') return input def _get_eos(self, sequence, structure, temperature, ligand=None): #TODO Hotknots cannot handle ligands and temperature structure, energy = HotKnots.eval(self._change_cuts(sequence), self._change_cuts(structure)) return energy def _get_fold(self, sequence, temperature, ligand=None, constraint=None): #TODO Hotknots cannot handle ligands and temperature return HotKnots.mfe(self._change_cuts(sequence)) def _get_pf_fold(self, sequence, temperature, ligand=None, constraint=None): #TODO return mfe as a bad approximation it cannot calculate the partition function return HotKnots.mfe(self._change_cuts(sequence)) def _get_ensemble_defect(self, sequence, structure, temperature, ligand=None): raise NotImplementedError
[docs]def remove_cuts(input): ''' Takes a string and removes all cut characters (+&) from this string :param input: string containing cut point characters :return: string without cut point characters ''' return re.sub('[+&]', '', input)
[docs]def add_cuts(input, cut_points): ''' Takes a string and add back the cut point characters at the positions stored in the cut points list :param input: string without cut point characters :return: string containing cut point characters ''' result = input for cut in cut_points: result = result[:cut-1] + '&' + result[cut-1:] return result
[docs]def create_bp_table(structure): ''' Takes a structure in dot bracket notation and returns a base pair table. Unpaired positions are -1, otherwise the index of the adjacent bracket is listed :param structure: string with dot-bracket notation of the strcture :return bpt: base pair table ''' bpo=[] bpt=[-1]*len(structure) for i, substr in enumerate(structure): if(substr=="("): bpo.append(i) elif(substr==")"): try: bpt[bpo.pop()] = i except: raise ValueError('Unbalanced brackets: too few opening brackets') if len(bpo) > 0: raise ValueError('Unbalanced brackets: too few closing brackets') return bpt