From dd179e57ac45079a745be6e929bcd4304a62cdbc Mon Sep 17 00:00:00 2001
From: Peter Schubert <Peter.Schubert@hhu.de>
Date: Thu, 10 Nov 2022 20:51:34 +0100
Subject: [PATCH] determine linear pathways with net stoichiometry

---
 modelpruner/graph/linear_pathway.py  | 450 +++++++++++++++------------
 modelpruner/graph/linear_pathways.py |  85 ++---
 modelpruner/graph/metabolite_node.py |   2 -
 3 files changed, 303 insertions(+), 234 deletions(-)

diff --git a/modelpruner/graph/linear_pathway.py b/modelpruner/graph/linear_pathway.py
index fbf2b95..baa43aa 100644
--- a/modelpruner/graph/linear_pathway.py
+++ b/modelpruner/graph/linear_pathway.py
@@ -18,38 +18,135 @@ class LinearPathway:
         init_sid: initial pathway node
         ng: Class Network Graph to work on
         """
-
         self.init_sid = init_sid
         self.balance_c2_sids = set(balance_c2_sids)
         self.ng = network_graph
         self.net_reactants = {}
         self.net_products = {}
+        self.segments = []
+        self.c2_sids_pwy = []
+        self.inconsistant = False
 
-        # construct the linear pathway
-        self.segments = [self.get_initial_segment()]
-        self.add_downstream_segments()
-        self.add_upstream_segments()
+        # construct the main linear pathway
+        main_segments = self.get_lin_pwy_containing(init_sid)
+        self.segments.extend(main_segments)
 
-        # collect pathway data
-        self.sids = []
-        self.rids = [self.segments[0].producing.rid]
-        self.reversible = self.segments[0].reversible
-        scale = 1.0
-        direction = 1.0 if self.segments[0].producing.forward is True else -1.0
-        self.rids_scale = {self.segments[0].producing.rid: scale * direction}
-        for pwy_seg in self.segments:
-            self.sids.append(pwy_seg.sid)
-            self.rids.append(pwy_seg.consuming.rid)
-            scale *= pwy_seg.producing.stoic / pwy_seg.consuming.stoic
-            direction = 1.0 if pwy_seg.consuming.forward is True else -1.0
-            self.rids_scale[pwy_seg.consuming.rid] = scale * direction
+        main_data = self.collect_pwy_data(main_segments)
+        self.c2_sids_pwy.extend(main_data['sid_seq'])
+        self.rids_scale = main_data['rids_scale']
 
+        # get_net_stoichiometry for current sub_network
         net_stoic = self.get_net_stoichiometry()
         self.net_reactants = {sid: -stoic for sid, stoic in net_stoic.items() if stoic < 0.0}
         self.net_products = {sid: stoic for sid, stoic in net_stoic.items() if stoic > 0.0}
+        c2_reactants = set(self.net_reactants.keys()).intersection(self.balance_c2_sids)
+        c2_products = set(self.net_products.keys()).intersection(self.balance_c2_sids)
+
+        # remove from balance sids the consumed sids
+        self.balance_c2_sids -= set(main_data['sid_seq'])
+
+        # adding branches to incorporate connected c2 nodes
+        connected_c2_sids = c2_reactants | c2_products
+        while len(connected_c2_sids) > 0:
+            branch_sid = connected_c2_sids.pop()
+
+            branch_segments = self.get_lin_pwy_containing(branch_sid)
+            self.segments.extend(branch_segments)
+
+            branch_data = self.collect_pwy_data(branch_segments)
+            self.c2_sids_pwy.extend(branch_data['sid_seq'])
+            self.rids_scale |= self.rescale_branch(branch_sid, branch_segments, branch_data)
+
+            net_stoic = self.get_net_stoichiometry()
+            self.net_reactants = {sid: -stoic for sid, stoic in net_stoic.items() if stoic < 0.0}
+            self.net_products = {sid: stoic for sid, stoic in net_stoic.items() if stoic > 0.0}
+            c2_reactants = set(self.net_reactants.keys()).intersection(self.balance_c2_sids)
+            c2_products = set(self.net_products.keys()).intersection(self.balance_c2_sids)
+            self.balance_c2_sids -= set(branch_data['sid_seq'])
+
+            connected_c2_sids = c2_reactants | c2_products
+
+    def rescale_branch(self, branch_sid, branch_segments, branch_data):
+        """Rescale branch reactions, considering joining reaction.
+
+        :param branch_sid: c2 metabolite used as reactant or product in main pathway
+        :type branch_sid: str
+        :param branch_segments: pathway segments of the branch
+        :type branch_segments: list of PathwaySegments
+        :param branch_data: data collected from the branch
+        :type branch_data: dict
+        :return: rescaled reactions of the branch, excluding the joint reaction
+        :rtype: dict (keys: reaction ids, values: reaction scale)
+        """
+        pwy_idx = branch_data['sid_seq'].index(branch_sid)
+        joining_seg = branch_segments[pwy_idx]
+
+        # determine joining reaction with main pathway
+        if joining_seg.producing.rid in self.rids_scale:
+            joint_rid = joining_seg.producing.rid
+        else:
+            joint_rid = joining_seg.consuming.rid
+
+        # rescale reactions in the branch wrt to the joint reaction with the main pathway
+        branch_rids_scale = branch_data['rids_scale']
+        main_scale = self.rids_scale[joint_rid] / branch_rids_scale[joint_rid]
+        rids_scale = {}
+        for branch_rid, branch_scale in branch_rids_scale.items():
+            if branch_rid not in self.rids_scale:
+                rids_scale[branch_rid] = branch_scale * main_scale
+        return rids_scale
+
+    def collect_pwy_data(self, pwy_segs):
+        """Collect date from linear pathway
+
+        Determine sequence of metabolites and sequence of reactions
+        Determine reversivility of the pathway
+        Determine relative scale of the reaction in the pathway
+          Initial reaction set at a scale of 1.0
+          Subsequent reactions scaling depend on input vs.
+            output stoichiometry of a pathway segment as well
+            as of reaction directionality used in the pathway
+
+        :param pwy_segs: pathway segments of the pathway
+        :type pwy_segs: list of PathwaySegments
+        :return: information from linear pathway
+        :rtype: dict
+        """
+        sid_seq = []
+        rid_seq = [pwy_segs[0].producing.rid]
+        reversible = pwy_segs[0].reversible
+        scale = 1.0
+        direction = 1.0 if pwy_segs[0].producing.forward is True else -1.0
+        rids_scale = {pwy_segs[0].producing.rid: scale * direction}
+        for pwy_seg in pwy_segs:
+            sid_seq.append(pwy_seg.sid)
+            rid_seq.append(pwy_seg.consuming.rid)
+            scale *= pwy_seg.producing.stoic / pwy_seg.consuming.stoic
+            direction = 1.0 if pwy_seg.consuming.forward is True else -1.0
+            rids_scale[pwy_seg.consuming.rid] = scale * direction
+        return {'init_sid': self.init_sid, 'sid_seq': sid_seq, 'rid_seq': rid_seq,
+                'rids_scale': rids_scale, 'reversible': reversible}
+
+    def get_lin_pwy_containing(self, c2_sid):
+        """Determine the linear pathway containing a specific c2 node.
+
+        :param c2_sid: metabolite with connectivity 2
+        :type c2_sid: str
+        :return: pathway segments building the linear pathway
+        :rtype: list of PathwaySegments
+        """
+        segments = [self.get_initial_segment(c2_sid)]
+        segments = self.add_downstream_segments(segments)
+        segments = self.add_upstream_segments(segments)
+        return segments
 
     def get_net_stoichiometry(self):
+        """determine net stoichiometry of the pathway
 
+        considering direction scale, including directionality
+        :return: overall metabolite stoichiometry
+        :rtype: dict (keys: metabolites ids; values: stoichiometry)
+        """
         substrates = set()
         for rid in self.rids_scale:
             substrates |= set(self.ng.edges[rid].reactants)
@@ -62,37 +159,39 @@ class LinearPathway:
             for sid, stoic in self.ng.edges[rid].products.items():
                 net_stoic[sid] += stoic * scale
 
-        return net_stoic
+        return {sid: stoic for sid, stoic in net_stoic.items() if stoic != 0.0}
 
-    def get_initial_segment(self):
+    def get_initial_segment(self, c2_sid):
         """get initial pathway segment for inital node.
 
         Node with upstream and downstream connectivity,
         from where connect subsequently other downstream
         and upstream pathway segments
 
+        :param c2_sid: metabolite with connectivity 2
+        :type c2_sid: str
         :return: inital pathway segment
         :rtype: PathwaySegment
         """
-        self.balance_c2_sids.remove(self.init_sid)
-        connectivity = self.get_us_ds_neighbors(self.init_sid)
+        self.balance_c2_sids.remove(c2_sid)
+        connectivity = self.get_us_ds_neighbors(c2_sid)
         return PathwaySegment(connectivity)
 
-    def add_downstream_segments(self):
+    def add_downstream_segments(self, segments):
         """add downstream segments """
 
         lcount = 1
-        upstream_seg = self.segments[-1]
+        upstream_seg = segments[-1]
         while ((upstream_seg.consuming is not None) and
                (upstream_seg.consuming.nb is not None) and
-               (lcount < 50)):
+               (lcount < 100)):
             lcount += 1
             pwy_rev = upstream_seg.reversible
             self.balance_c2_sids.remove(upstream_seg.consuming.nb)
             connectivity = self.get_ds_neighbor(upstream_seg)
             pwy_seg = PathwaySegment(connectivity)
             pwy_seg.set_reversible(pwy_rev and connectivity['reversible'])
-            self.segments.append(pwy_seg)
+            segments.append(pwy_seg)
 
             if connectivity['consuming'] is None:
                 # termination of the pathway at a c1 metabolite (blocked node)
@@ -103,45 +202,47 @@ class LinearPathway:
                 if pwy_rev is True:
                     if connectivity['reversible'] is False:
                         # irreversible termination in a reversible pathway
-                        for pwy_seg in self.segments:
+                        for pwy_seg in segments:
                             pwy_seg.set_reversible(False)
                     else:
                         # reversible termination in a reversible pathway
-                        for pwy_seg in self.segments:
+                        for pwy_seg in segments:
                             pwy_seg.reverse_direction()
-                        self.segments.reverse()
+                        segments.reverse()
 
             if connectivity['consuming'].nb is not upstream_seg.sid:
                 # downstream connectivity to a c2 node
                 if (pwy_rev is True) and (connectivity['reversible'] is False):
-                    for pwy_seg in self.segments:
+                    for pwy_seg in segments:
                         pwy_seg.set_reversible(False)
             elif pwy_rev is True:
                 # pathway reversal situation
                 assert connectivity['reversible'] is False
-                for pwy_seg in self.segments[:-1]:
+                for pwy_seg in segments[:-1]:
                     pwy_seg.set_reversible(False)
                     pwy_seg.reverse_direction()
-                self.segments.reverse()
+                segments.reverse()
             else:
+                self.inconsistant = True
                 print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
                 break
-            upstream_seg = self.segments[-1]
+            upstream_seg = segments[-1]
+        return segments
 
-    def add_upstream_segments(self):
+    def add_upstream_segments(self, segments):
 
         lcount = 1
-        downstream_seg = self.segments[0]
+        downstream_seg = segments[0]
         while ((downstream_seg.producing is not None) and
                (downstream_seg.producing.nb in self.balance_c2_sids) and
-               (lcount < 50)):
+               (lcount < 100)):
             lcount += 1
             pwy_rev = downstream_seg.reversible
             self.balance_c2_sids.remove(downstream_seg.producing.nb)
             connectivity = self.get_us_neighbor(downstream_seg)
             pwy_seg = PathwaySegment(connectivity)
             pwy_seg.set_reversible(pwy_rev and connectivity['reversible'])
-            self.segments.insert(0, pwy_seg)
+            segments.insert(0, pwy_seg)
 
             if connectivity['producing'] is None:
                 # termination of the pathway at a c1 metabolite (blocked node)
@@ -150,26 +251,28 @@ class LinearPathway:
             if connectivity['producing'].nb is None:
                 # termination of the pathway with connectivity towards the network
                 if (pwy_rev is True) and (connectivity['reversible'] is False):
-                    for pwy_seg in self.segments:
+                    for pwy_seg in segments:
                         pwy_seg.set_reversible(False)
                 break
 
             if connectivity['producing'].nb is not downstream_seg.sid:
                 # upstream connectivity to a c2 node
                 if (pwy_rev is True) and (connectivity['reversible'] is False):
-                    for pwy_seg in self.segments:
+                    for pwy_seg in segments:
                         pwy_seg.set_reversible(False)
             elif pwy_rev is True:
                 # pathway reversal situation
                 assert connectivity['reversible'] is False
-                for pwy_seg in self.segments[:-1]:
+                for pwy_seg in segments[:-1]:
                     pwy_seg.set_reversible(False)
                     pwy_seg.reverse_direction()
-                self.segments.reverse()
+                segments.reverse()
             else:
+                self.inconsistant = True
                 print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
                 break
-            downstream_seg = self.segments[0]
+            downstream_seg = segments[0]
+        return segments
 
     def get_neighbor_node(self, substrates):
         """Get a c2 node from reactants/product sids
@@ -201,136 +304,112 @@ class LinearPathway:
         :rtype: dict
         """
         cur_node = self.ng.nodes[init_sid]
+        assert cur_node.connectivity == 2
 
         rids = [rid for rid in (cur_node.consuming_rids | cur_node.producing_rids)]
         revs = [val['rev'] for val in (cur_node.consuming_rids | cur_node.producing_rids).values()]
         rev = bool(np.all(np.array(revs)))
-        producing_leg = None
-        consuming_leg = None
 
-        if cur_node.connectivity == 1:
-            # c1 nodes are actually blocked nodes where a pathway ends
-            rid = rids[0]
+        if rev is True:
+            # process a node with reversible fluxes on both sides
+            #  one being considered as producing, one as consuming reaction
+            nbs = [None, None]
+            stoics = [0.0, 0.0]
+            fwds = [True, True]
+
+            # arbitrarily select first reaction as producing and second as consuming
+            if rids[0] in cur_node.consuming_rids:
+                fwds[0] = False
+            if rids[1] in cur_node.producing_rids:
+                fwds[1] = False
+
+            for idx in range(2):
+                if rids[idx] in cur_node.consuming_rids:
+                    stoics[idx] = cur_node.consuming_rids[rids[idx]]['stoic']
+                    nbs[idx] = self.get_neighbor_node(self.ng.edges[rids[idx]].products)
+                else:
+                    stoics[idx] = cur_node.producing_rids[rids[idx]]['stoic']
+                    nbs[idx] = self.get_neighbor_node(self.ng.edges[rids[idx]].reactants)
 
-            if rid in cur_node.consuming_rids:
-                stoic = cur_node.consuming_rids[rid]['stoic']
-                nb = self.get_neighbor_node(self.ng.edges[rid].products)
-                consuming_leg = ReactionLeg(rid, stoic, nb, True)
+            # preferably the producing leg connects to the network (nb = None)
+            if nbs[0] is None:
+                producing_leg = ReactionLeg(rids[0], stoics[0], nbs[0], fwds[0])
+                consuming_leg = ReactionLeg(rids[1], stoics[1], nbs[1], fwds[1])
             else:
-                stoic = cur_node.producing_rids[rid]['stoic']
-                nb = self.get_neighbor_node(self.ng.edges[rid].reactants)
-                if rev is False:
-                    producing_leg = ReactionLeg(rid, stoic, nb, True)
-                else:
-                    # in case of a reversible producing reaction, configure consuming path
-                    consuming_leg = ReactionLeg(rid, stoic, nb, False)
-
-        if cur_node.connectivity == 2:
-            if rev is True:
-                # process a node with reversible fluxes on both sides
-                #  one being considered as producing, one as consuming reaction
-                nbs = [None, None]
-                stoics = [0.0, 0.0]
-                fwds = [True, True]
-
-                # arbitrarily select first reaction as producing and second as consuming
-                if rids[0] in cur_node.consuming_rids:
-                    fwds[0] = False
-                if rids[1] in cur_node.producing_rids:
-                    fwds[1] = False
-
-                for idx in range(2):
-                    if rids[idx] in cur_node.consuming_rids:
-                        stoics[idx] = cur_node.consuming_rids[rids[idx]]['stoic']
-                        nbs[idx] = self.get_neighbor_node(self.ng.edges[rids[idx]].products)
-                    else:
-                        stoics[idx] = cur_node.producing_rids[rids[idx]]['stoic']
-                        nbs[idx] = self.get_neighbor_node(self.ng.edges[rids[idx]].reactants)
+                producing_leg = ReactionLeg(rids[1], stoics[1], nbs[1], not fwds[1])
+                consuming_leg = ReactionLeg(rids[0], stoics[0], nbs[0], not fwds[0])
 
-                # preferably the producing leg connects to the network (nb = None)
-                if nbs[0] is None:
-                    producing_leg = ReactionLeg(rids[0], stoics[0], nbs[0], fwds[0])
-                    consuming_leg = ReactionLeg(rids[1], stoics[1], nbs[1], fwds[1])
+        # get neighbors of a node in a directional pathway segment
+        else:
+            # identify one directional reaction connecting the node
+            if revs[0] is False:
+                drid, orid = rids
+            else:
+                orid, drid = rids
+
+            # check if the selected directional reaction is a consuming reaction
+            #   the other reaction (directional or reversible) must then be the producing reaction
+            if drid in cur_node.consuming_rids:
+                c_rid = drid
+                p_rid = orid
+                c_stoic = cur_node.consuming_rids[c_rid]['stoic']
+                nb_ds = self.get_neighbor_node(self.ng.edges[c_rid].products)
+                c_fwd = True
+
+                # the 'producing' reaction can be a producing or a reversible consuming reaction
+                if p_rid in cur_node.producing_rids:
+                    p_stoic = cur_node.producing_rids[p_rid]['stoic']
+                    nb_us = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
+                    p_fwd = True
                 else:
-                    producing_leg = ReactionLeg(rids[1], stoics[1], nbs[1], not fwds[1])
-                    consuming_leg = ReactionLeg(rids[0], stoics[0], nbs[0], not fwds[0])
+                    # case of a consuming reaction run in reverse
+                    assert cur_node.consuming_rids[p_rid]['rev'] is True
+                    p_stoic = cur_node.consuming_rids[p_rid]['stoic']
+                    nb_us = self.get_neighbor_node(self.ng.edges[p_rid].products)
+                    p_fwd = False
 
-            # get neighbors of a node in a directional pathway segment
+            # case, where the selected directional reaction is a producing reaction
             else:
-                # identify one directional reaction connecting the node
-                if revs[0] is False:
-                    drid, orid = rids
-                else:
-                    orid, drid = rids
+                p_rid = drid
+                c_rid = orid
+                p_stoic = cur_node.producing_rids[p_rid]['stoic']
+                nb_us = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
+                p_fwd = True
 
-                # check if the selected directional reaction is a consuming reaction
-                #   the other reaction (directional or reversible) must then be the producing reaction
-                if drid in cur_node.consuming_rids:
-                    c_rid = drid
-                    p_rid = orid
+                if c_rid in cur_node.consuming_rids:
                     c_stoic = cur_node.consuming_rids[c_rid]['stoic']
                     nb_ds = self.get_neighbor_node(self.ng.edges[c_rid].products)
                     c_fwd = True
-
-                    # the 'producing' reaction can be a producing or a reversible consuming reaction
-                    if p_rid in cur_node.producing_rids:
-                        p_stoic = cur_node.producing_rids[p_rid]['stoic']
-                        nb_us = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
-                        p_fwd = True
-                    else:
-                        # case of a consuming reaction run in reverse
-                        assert cur_node.consuming_rids[p_rid]['rev'] is True
-                        p_stoic = cur_node.consuming_rids[p_rid]['stoic']
-                        nb_us = self.get_neighbor_node(self.ng.edges[p_rid].products)
-                        p_fwd = False
-
-                # case, where the selected directional reaction is a producing reaction
                 else:
-                    p_rid = drid
-                    c_rid = orid
-                    p_stoic = cur_node.producing_rids[p_rid]['stoic']
-                    nb_us = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
-                    p_fwd = True
+                    assert cur_node.producing_rids[c_rid]['rev'] is True
+                    c_stoic = cur_node.producing_rids[c_rid]['stoic']
+                    nb_ds = self.get_neighbor_node(self.ng.edges[c_rid].reactants)
+                    c_fwd = False
 
-                    if c_rid in cur_node.consuming_rids:
-                        c_stoic = cur_node.consuming_rids[c_rid]['stoic']
-                        nb_ds = self.get_neighbor_node(self.ng.edges[c_rid].products)
-                        c_fwd = True
-                    else:
-                        assert cur_node.producing_rids[c_rid]['rev'] is True
-                        c_stoic = cur_node.producing_rids[c_rid]['stoic']
-                        nb_ds = self.get_neighbor_node(self.ng.edges[c_rid].reactants)
-                        c_fwd = False
-
-                producing_leg = ReactionLeg(p_rid, p_stoic, nb_us, p_fwd)
-                consuming_leg = ReactionLeg(c_rid, c_stoic, nb_ds, c_fwd)
+            producing_leg = ReactionLeg(p_rid, p_stoic, nb_us, p_fwd)
+            consuming_leg = ReactionLeg(c_rid, c_stoic, nb_ds, c_fwd)
 
         return {'sid': init_sid, 'producing': producing_leg, 'consuming': consuming_leg, 'reversible': rev}
 
     def get_ds_neighbor(self, us_pwy_seg):
         """get downstream neighbor at us_sid coming from p_rid.
 
-        Only for c1 and c2 nodes
-
-        c1 node: this is a terminal node (blocked node),
-          as only exising reaction is consumed by p_rid
+        downstream is either a consuming reaction or
+        a reversible producing reaction
+        downstream node can be another c1/c2 node, or the network connection
+        reversible flag is set according to flow through this node
 
-        c2 node: downstream is either a consuming reaction or
-          a reversible producing reaction
-          downstream node can be another c1/c2 node, or the network connection
-          reversible flag is set according to flow through this node
-
-          special case: remaining reaction is a directional producing reaction,
-          (while an actual consuming reaction is required). There is a problem, unless
-          the upstream pathway is reversible (leading to a directional pathway
-          in opposite direction)
+        special case: remaining reaction is a directional producing reaction,
+        (while an actual consuming reaction is required). There is a problem, unless
+        the upstream pathway is reversible (leading to a directional pathway
+        in opposite direction)
         """
         sid = us_pwy_seg.consuming.nb
         cur_node = self.ng.nodes[sid]
+        assert cur_node.connectivity == 2
+
         revs = [val['rev'] for val in (cur_node.consuming_rids | cur_node.producing_rids).values()]
         rev = bool(np.all(np.array(revs)))
-        producing_leg = None
-        consuming_leg = None
 
         # collect information for upstream leg
         p_rid = us_pwy_seg.consuming.rid
@@ -343,28 +422,24 @@ class LinearPathway:
             p_stoic = cur_node.consuming_rids[p_rid]['stoic']
             p_fwd = False
 
-        if cur_node.connectivity == 1:
-            producing_leg = ReactionLeg(p_rid, p_stoic, us_nb, p_fwd)
+        c_rid = [rid for rid in (cur_node.consuming_rids | cur_node.producing_rids) if rid != p_rid][0]
+        if c_rid in cur_node.consuming_rids:
+            c_stoic = cur_node.consuming_rids[c_rid]['stoic']
+            ds_nb = self.get_neighbor_node(self.ng.edges[c_rid].products)
+            c_fwd = True
+        else:
+            # a 'producing' reaction consuming the metabolite
+            c_stoic = cur_node.producing_rids[c_rid]['stoic']
+            ds_nb = self.get_neighbor_node(self.ng.edges[c_rid].reactants)
+            c_fwd = False
 
-        if cur_node.connectivity == 2:
-            c_rid = [rid for rid in (cur_node.consuming_rids | cur_node.producing_rids) if rid != p_rid][0]
-            if c_rid in cur_node.consuming_rids:
-                c_stoic = cur_node.consuming_rids[c_rid]['stoic']
-                ds_nb = self.get_neighbor_node(self.ng.edges[c_rid].products)
-                c_fwd = True
-            else:
-                # a 'producing' reaction consuming the metabolite
-                c_stoic = cur_node.producing_rids[c_rid]['stoic']
-                ds_nb = self.get_neighbor_node(self.ng.edges[c_rid].reactants)
-                c_fwd = False
-
-            # check for reversal in case the producing reaction is directional
-            if (c_rid in cur_node.consuming_rids) or (cur_node.producing_rids[c_rid]['rev'] is True):
-                producing_leg = ReactionLeg(p_rid, p_stoic, us_nb, p_fwd)
-                consuming_leg = ReactionLeg(c_rid, c_stoic, ds_nb, c_fwd)
-            else:
-                producing_leg = ReactionLeg(c_rid, c_stoic, ds_nb, not c_fwd)
-                consuming_leg = ReactionLeg(p_rid, p_stoic, us_nb, not p_fwd)
+        # check for reversal in case the producing reaction is directional
+        if (c_rid in cur_node.consuming_rids) or (cur_node.producing_rids[c_rid]['rev'] is True):
+            producing_leg = ReactionLeg(p_rid, p_stoic, us_nb, p_fwd)
+            consuming_leg = ReactionLeg(c_rid, c_stoic, ds_nb, c_fwd)
+        else:
+            producing_leg = ReactionLeg(c_rid, c_stoic, ds_nb, not c_fwd)
+            consuming_leg = ReactionLeg(p_rid, p_stoic, us_nb, not p_fwd)
 
         return {'sid': sid, 'producing': producing_leg, 'consuming': consuming_leg, 'reversible': rev}
 
@@ -372,10 +447,10 @@ class LinearPathway:
 
         sid = ds_pwy_seg.producing.nb
         cur_node = self.ng.nodes[sid]
+        assert cur_node.connectivity == 2
+
         revs = [val['rev'] for val in (cur_node.consuming_rids | cur_node.producing_rids).values()]
         rev = bool(np.all(np.array(revs)))
-        producing_leg = None
-        consuming_leg = None
 
         # collect information for downstream leg
         c_rid = ds_pwy_seg.producing.rid
@@ -388,33 +463,22 @@ class LinearPathway:
             c_stoic = cur_node.producing_rids[c_rid]['stoic']
             c_fwd = False
 
-        if cur_node.connectivity == 1:
-            consuming_leg = ReactionLeg(c_rid, c_stoic, ds_nb, c_fwd)
+        p_rid = [rid for rid in (cur_node.consuming_rids | cur_node.producing_rids) if rid != c_rid][0]
+        if p_rid in cur_node.producing_rids:
+            p_stoic = cur_node.producing_rids[p_rid]['stoic']
+            us_nb = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
+            p_fwd = True
+        else:
+            p_stoic = cur_node.consuming_rids[p_rid]['stoic']
+            us_nb = self.get_neighbor_node(self.ng.edges[p_rid].products)
+            p_fwd = False
 
-        if cur_node.connectivity == 2:
-            p_rid = [rid for rid in (cur_node.consuming_rids | cur_node.producing_rids) if rid != c_rid][0]
-            if p_rid in cur_node.producing_rids:
-                p_stoic = cur_node.producing_rids[p_rid]['stoic']
-                us_nb = self.get_neighbor_node(self.ng.edges[p_rid].reactants)
-                p_fwd = True
-            else:
-                p_stoic = cur_node.consuming_rids[p_rid]['stoic']
-                us_nb = self.get_neighbor_node(self.ng.edges[p_rid].products)
-                p_fwd = False
-
-            # check for reversal in case the producing reaction is directional
-            if (p_rid in cur_node.producing_rids) or (cur_node.consuming_rids[p_rid]['rev'] is True):
-                producing_leg = ReactionLeg(p_rid, p_stoic, us_nb, p_fwd)
-                consuming_leg = ReactionLeg(c_rid, c_stoic, ds_nb, c_fwd)
-            else:
-                producing_leg = ReactionLeg(c_rid, c_stoic, ds_nb, not c_fwd)
-                consuming_leg = ReactionLeg(p_rid, p_stoic, us_nb, not p_fwd)
+        # check for reversal in case the producing reaction is directional
+        if (p_rid in cur_node.producing_rids) or (cur_node.consuming_rids[p_rid]['rev'] is True):
+            producing_leg = ReactionLeg(p_rid, p_stoic, us_nb, p_fwd)
+            consuming_leg = ReactionLeg(c_rid, c_stoic, ds_nb, c_fwd)
+        else:
+            producing_leg = ReactionLeg(c_rid, c_stoic, ds_nb, not c_fwd)
+            consuming_leg = ReactionLeg(p_rid, p_stoic, us_nb, not p_fwd)
 
         return {'sid': sid, 'producing': producing_leg, 'consuming': consuming_leg, 'reversible': rev}
-
-    def get_reactions(self):
-
-        rids = set()
-        for pwy_seg in self.segments:
-            rids |= pwy_seg.get_reactions()
-        return rids
diff --git a/modelpruner/graph/linear_pathways.py b/modelpruner/graph/linear_pathways.py
index d488269..77be0d3 100644
--- a/modelpruner/graph/linear_pathways.py
+++ b/modelpruner/graph/linear_pathways.py
@@ -8,57 +8,64 @@ from modelpruner.graph.linear_pathway import LinearPathway
 
 class LinearPathways:
 
-    # TODO: rework again to first get lin_pathways from each of the nodes (excluding biomass precursors)
-    #  idea to get as long pathways as possible
-    #  sort for longest pathways
-    #  remove duplicated pathways
-    #  accept longest pathways first and drop pathways that contain already consumed reactions
-    #  check if c2_nodes remain, and for these request again pathways
+    def __init__(self, network_graph):
+        """Initialize LinearPathways.
 
-    def __init__(self, network_graph, protected_rids=None):
+        LinearPathways contain c2 metaboltes (metabolites with connectivity of 2).
+        c2 metabolites can be protected using protect_reactions() method,
+         e.g. protect c2 metabolites connected to biomass function
+              protect c2 metabolites connected ot exchange functions
+
+        :param network_graph: network graph for fba model
+        :type network_graph: NetworkGraph
         """
+        self.ng = network_graph
+        self.c2_sids = {node.sid: None for node in self.ng.nodes.values() if node.is_c2_node()}
+        self.pwy_rids = {}
+        self.pathways = []
+        self.inconsistant_pathways = []
 
-        Construct linear pathways from metabolites with a connectivity of 2
+    def protect_reactions(self, protected_rids):
+        """Protect metabolites connected to selected reactions.
 
-        C1 metabolites (with connectivity of 1) are also included
-        Each such C1/C2 metabolite will only be consumed in once pathway
+        :param protected_rids: list of reaction ids to be protected
+        :type protected_rids: list of str
         """
+        protected_sids = set()
+        for rid in protected_rids:
+            if rid in self.ng.edges:
+                protected_sids |= set(self.ng.edges[rid].reactants.keys())
+                protected_sids |= set(self.ng.edges[rid].products.keys())
+        for sid in protected_sids:
+            if sid in self.c2_sids:
+                del self.c2_sids[sid]
 
-        self.ng = network_graph
+    def construct(self):
+        """Construct linear pathways for the network.
 
-        # TODO: biomass protected, external metabolites protected
-        # metabolites connected to protected reactions get protected
-        protected_sids = set()
-        if protected_rids is not None:
-            for rid in protected_rids:
-                if rid in self.ng.edges:
-                    protected_sids |= set(self.ng.edges[rid].reactants.keys())
-                    protected_sids |= set(self.ng.edges[rid].products.keys())
+        We iterate through all c2 metabolites. At each step we select one c2 metabolite
+        that has not already been consumed on one of the linear pathways. From this
+        c2 metabolite we construct a complete linear pathways, including branches that
+        consist of c2 metabolites.
 
-        self.c2_sids = {node.sid: None for node in self.ng.nodes.values()
-                        if node.sid not in protected_sids and node.is_c2_node()}
-        self.pwy_rids = {}
-        self.pathways = []
+        We update references in c2_sids and pwy_rids to indicate in which pathway
+        respective metabolites and reactions are contained.
 
+        :return: number of linear pathways
+        :rtype: integer
+        """
         pwy_idx = 0
         for sid in sorted(self.c2_sids):
             if self.c2_sids[sid] is None:
                 balance_c2_sids = {sid for sid, idx in self.c2_sids.items() if idx is None}
-                lin_pwy = LinearPathway(sid, balance_c2_sids, network_graph)
-                self.pathways.append(lin_pwy)
-
-                for subs_sid in self.pwy_substrates(lin_pwy):
-                    if (subs_sid in self.c2_sids) and (self.c2_sids[subs_sid] is None):
+                lin_pwy = LinearPathway(sid, balance_c2_sids, self.ng)
+                if lin_pwy.inconsistant is False:
+                    self.pathways.append(lin_pwy)
+                    for subs_sid in lin_pwy.c2_sids_pwy:
                         self.c2_sids[subs_sid] = pwy_idx
-                for rid in lin_pwy.get_reactions():
-                    self.pwy_rids[rid] = pwy_idx
+                    for rid in lin_pwy.rids_scale:
+                        self.pwy_rids[rid] = pwy_idx
+                else:
+                    self.inconsistant_pathways.append(lin_pwy)
                 pwy_idx += 1
-
-    def pwy_substrates(self, lin_pwy):
-        sids = set()
-        for pwy_seg in lin_pwy.segments:
-            sids |= set(self.ng.edges[pwy_seg.producing.rid].reactants.keys())
-            sids |= set(self.ng.edges[pwy_seg.producing.rid].products.keys())
-            sids |= set(self.ng.edges[pwy_seg.consuming.rid].reactants.keys())
-            sids |= set(self.ng.edges[pwy_seg.consuming.rid].products.keys())
-        return sids
+        return pwy_idx
diff --git a/modelpruner/graph/metabolite_node.py b/modelpruner/graph/metabolite_node.py
index df0e704..6e98ec4 100644
--- a/modelpruner/graph/metabolite_node.py
+++ b/modelpruner/graph/metabolite_node.py
@@ -32,7 +32,5 @@ class MetaboliteNode:
             if len(self.consuming_rids) == 2:
                 if bool(np.any(np.array([val['rev'] for val in self.consuming_rids.values()]))) is True:
                     return True
-        elif self.connectivity == 1:  # blocked metabolite
-            return True
         else:
             return False
-- 
GitLab