forgi 2.0.0 documentation

Contents

Source code for forgi.graph.transform_graphs

"""
This module has functions which take a Bulge-Graph as input
and return a modified copy of it.

This is implemented as a class which is accessible as
BulgeGraph.transformed. This way we can properly inherit in CoarseGrainRNA
"""
import copy
import logging

from .sequence import MissingResidue, Sequence


log = logging.getLogger(__name__)


class _GCDummy(object):
    """
    Can be used in place of GraphConstruction for BG-Initialization
    """

    def __init__(self, defines, edges):
        self.defines = defines
        self.edges = edges


[docs]class BGTransformer(object): def __init__(self, bg): self.bg = bg def _without_elements(self, elems): """ Return a copy of the BulgeGraph without the elements in elem. Their residues will be converted to missing residues. :param elems: A list of element names, e.g. ["s1","s2"] """ raise NotImplementedError("This is still work in progress.") # We use the PDB numbering in new_defines, so we do not have to adjust indices if we remove # residues in the middle. resid_defines = {} for k, v in self.bg.defines: resid_defines[k] = list(map(self.bg.seq.to_resid, v)) new_edges = copy.deepcopy(self.bg.edges) to_missing = [] for elem in elems: if elem[0] != "i": raise NotImplementedError("TODO") else: stem1, stem2 = new_edges[elem] elem_define_a = list( map(self.bg.seq.to_resid, self.bg.define_a(elem))) # Remove the iloop to_missing.extend( self.bg.define_residue_num_iterator(elem, seq_ids=True)) new_edges[stem1].remove(elem) new_edges[stem2].remove(elem) del new_edges[elem] # Merge stem2 into stem1 new_edges[stem1] |= new_edges[stem2] del new_edges[stem2] if resid_defines[stem1][1] in elem_define_a: # stem1 -IL - stem2 resid_defines[stem1][1] = resid_defines[stem2][1] resid_defines[stem1][2] = resid_defines[stem2][2] else: # stem2 - il - stem1 resid_defines[stem1][0] = resid_defines[stem2][0] resid_defines[stem1][3] = resid_defines[stem2][3] del resid_defines[stem2] del resid_defines[elem]
[docs] def condensed(self): """ Return a condensed copy of the BulgeGraph. In the condensed BulgeGraph only the first (most 5-prime) nucleotide or base-pair of each element is retained, and the other nts/ base-pairs are converted to missing residues. In basepairs the first basepair contains the most 5' and most 3' nucleotide of the stem. """ log.debug("Condensing BG with break-points %s", self.bg.backbone_breaks_after) log.info("Condensing Graph %s", self.bg.to_dotbracket_string()) new_defines = {} new_seqids = [] new_seq = "" new_missing = self.bg.seq.with_missing.export_missing() new_i = 1 for elem in self.bg.iter_elements_along_backbone(): if not self.bg.defines[elem]: new_defines[elem] = [] else: if elem in new_defines: # Backwards strand for stems: if len(new_defines[elem]) != 2: log.error("%s", self.bg.edges[elem]) assert len(new_defines[elem]) == 2, "{} doesn't have len 2".format( new_defines[elem]) assert elem[0] in "si" fr, to = self.bg.defines[elem][2:] if elem[0] == "s": # to keep basepairing consistent, we keep the last (not the first) # nt. keep_i = to else: # To keep interiour loops consistent with the case where # it is only at the second strand, keep the first nt here keep_i = fr new_defines[elem].extend([new_i, new_i]) log.debug("Extended new_defines for %s to %s", elem, new_defines[elem]) else: fr, to = self.bg.defines[elem][:2] keep_i = fr new_defines[elem] = [new_i, new_i] log.debug("Set new_defines for %s to %s", elem, new_defines[elem]) new_i += 1 new_seq += self.bg.seq[keep_i] new_seqids.append(self.bg.seq.to_resid(keep_i)) for i in range(fr, to + 1): if i != keep_i: seq_id = self.bg.seq.to_resid(i) new_missing.append( MissingResidue(seq_id, self.bg.seq[i])) if i in self.bg.backbone_breaks_after: if i >= keep_i: new_seq += "&" else: new_seq = new_seq[:-1] + "&" + new_seq[-1] log.debug("Breakpoint %s: new_seq now %s", i, new_seq) log.info("Condensing iteration done. Now creating condensed BG") graph_constr = _GCDummy(new_defines, copy.deepcopy(self.bg.edges)) seq = Sequence(new_seq, new_seqids, new_missing, self.bg.seq._modifications) return type(self.bg)(graph_constr, seq, name=self.bg.name + "_condensed", _dont_split=True)

Contents