From 4c86e5d743a2c08add36650e959dabc00395a0fd Mon Sep 17 00:00:00 2001 From: Peter Schubert <Peter.Schubert@hhu.de> Date: Fri, 18 Nov 2022 15:09:32 +0100 Subject: [PATCH] Considering linear pathways during pruning --- modelpruner/_version.py | 2 +- modelpruner/core/model_pruner.py | 228 ++++++++++++++++++++------- modelpruner/graph/linear_pathway.py | 4 +- modelpruner/graph/linear_pathways.py | 65 ++++++-- modelpruner/graph/network_graph.py | 15 ++ modelpruner/models/fba_model.py | 29 +--- 6 files changed, 249 insertions(+), 94 deletions(-) diff --git a/modelpruner/_version.py b/modelpruner/_version.py index dc8bf37..bb7db76 100644 --- a/modelpruner/_version.py +++ b/modelpruner/_version.py @@ -1,5 +1,5 @@ """ Definition of version string. """ -__version__ = "0.3.0" +__version__ = "0.3.1" program_name = 'modelpruner' diff --git a/modelpruner/core/model_pruner.py b/modelpruner/core/model_pruner.py index 63c89d0..b32e1db 100644 --- a/modelpruner/core/model_pruner.py +++ b/modelpruner/core/model_pruner.py @@ -34,6 +34,8 @@ import json from modelpruner.models.fba_model import FbaModel from modelpruner.core.protected_parts import ProtectedParts +from modelpruner.graph.network_graph import NetworkGraph +from modelpruner.graph.linear_pathways import LinearPathways global _fba_model global _pfs @@ -90,7 +92,7 @@ def _worker(rids): class ModelPruner: def __init__(self, sbml_fname, protected_parts, resume=False, processes=100): - """Init + """Initialize ModelPruner :param sbml_fname: pathname of original model (ending with '.xml') :type sbml_fname: str @@ -105,6 +107,8 @@ class ModelPruner: self.cpus = min(processes, os.cpu_count()) self.min_mp_total_flux = 1000 self.full_sbml_fname = sbml_fname + self.ng = None + self.lpwys = None self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname) self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname) @@ -116,8 +120,8 @@ class ModelPruner: self.pps = ProtectedParts(protected_parts) self.protected_sids = self.pps.protected_sids - - self.protected_rids = self.get_total_protected_rids() + self.protected_rids = self.pps.protected_rids + # self.protected_rids = self.get_total_protected_rids() self.essential_rids = set() # if resume option selected and snapshot of essential rids exists, use them @@ -138,10 +142,11 @@ class ModelPruner: notes = self.pps.protected_functions[idx].notes print(f' protected function {idx}, optimum: {val:.04f} ({notes})') - print('Check consistency of protected parts') + print('Check consistency of protected parts:') blocked_p_rids = self.check_protected_parts() self.active_protected_rids = self.protected_rids.difference(blocked_p_rids) + # TODO: can be removed once pruning validated def get_total_protected_rids(self): """Determine protected reactions, incl. reactions connecting protected metabolites. @@ -225,7 +230,10 @@ class ModelPruner: if len(pf_unprotected_rids) > 0: print(' Reactions in protected functions that are not protected:', pf_unprotected_rids) - blocked_rids = self.reaction_types_fva(self.fba_model.rids)['blocked_rids'] + self.set_linear_pathways() + compressed_rids = self.compress_pwy_rids(self.fba_model.rids) + reaction_types = self.reaction_types_fva(compressed_rids) + blocked_rids = self.uncompress_pwy_rids(reaction_types['blocked_rids']) blocked_p_rids = self.protected_rids.intersection(blocked_rids) blocked_pf_rids = pf_rids.intersection(blocked_rids) if len(blocked_p_rids) > 0: @@ -237,29 +245,30 @@ class ModelPruner: def identify_parallel_reactions(self): """Identify parallel reactions having same stoichiometry (considering reverse). - protected reaction are preserved. - reversible reactions have higher priority + From all reactions in a group of parallel reactions, try to preserve one reaction + in the model + protected reactions or reactions in protected linear pathways are preserved + in any case. + if there exists a reversible reaction, we try to preserve it :return: reaction ids of parallel reactions :rtype: set of str """ - drop_rids = set() - for group_rids in self.fba_model.get_parallel_rids(): - protected = set() + drop_parallel_rids = set() + for parallel_rids in self.fba_model.get_parallel_rids(): + drop_candidate_rids = set(parallel_rids) rid_rev = None - for rid in group_rids: - if rid in self.protected_rids: - protected.add(rid) - if rid in self.fba_model.rids_rev: + for rid in parallel_rids: + if (rid in self.protected_rids) or (self.lpwys.get_pathway_from_rid(rid)['protected'] is True): + drop_candidate_rids.remove(rid) + elif rid in self.fba_model.rids_rev: rid_rev = rid - if len(protected) > 0: - drop_group_rids = set(group_rids) - protected - elif rid_rev is not None: - drop_group_rids = set(group_rids) - {rid_rev} - else: - drop_group_rids = set(group_rids[1:]) - drop_rids |= drop_group_rids - return drop_rids + if len(drop_candidate_rids) > 1 and rid_rev is not None: + drop_candidate_rids.remove(rid_rev) + elif len(drop_candidate_rids) == len(parallel_rids): + drop_candidate_rids.pop() + drop_parallel_rids |= drop_candidate_rids + return drop_parallel_rids def is_protected_rids_blocked(self, candidate_rid): """check if active protected reactions get blocked by removal of specified reaction. @@ -296,14 +305,13 @@ class ModelPruner: :rtype: dict """ # print(time.strftime("%H:%M:%S", time.localtime())) - n_pf = len(self.pps.protected_functions) flux_min_pfs = np.zeros((n_pf, len(free_rids))) flux_max_pfs = np.zeros((n_pf, len(free_rids))) res = {} # setting up multiprocessing takes quite some resources - # there is a tradeoff wrt running on one processor + # there is a tradeoff wrt running on a single processor processes = min(self.cpus, int(len(free_rids)*n_pf/self.min_mp_total_flux)) # processes = min(self.cpus, len(free_rids)) @@ -327,7 +335,6 @@ class ModelPruner: flux_max_pfs[idx] = res['max'] else: print(f'FBA for condition {pf.obj_id} is not feasible') - # TODO: can this happen? how best to continue? flux_min_pfs[idx] = np.nan flux_max_pfs[idx] = np.nan break @@ -387,12 +394,112 @@ class ModelPruner: 'candidate_rids_sorted': candidate_rids_sorted} return reaction_types + def set_linear_pathways(self, protected_rids=None): + """Retrieve and set linear pathways from fba model. + + Construct from current fba model a network graph and linear pathways + Exclude specific metabolites from participating in linear pathways. + Such specific metabolites are substrates consumed/produced by + specific reactions (protected reactions) + Default in case no protected reactions are supplied is excluding + all metabolites used in the fba objective functions of the fba model + (e.g. the Biomass functions) + Note: This blocks constructing linear pathways connected through + the biomass functions + + :param protected_rids: list of reaction ids from whith to retrieve substrates + :type protected_rids: None (= fba biomass reactions) or list/set of str, default: None + """ + ng = NetworkGraph(self.fba_model) + self.lpwys = LinearPathways(ng) + if protected_rids is None: + protected_rids = self.fba_model.biomass_rids + exclude_sids = ng.get_substrates(protected_rids) + self.lpwys.exclude_sids(exclude_sids) + self.lpwys.construct() + + def compress_pwy_rids(self, uncompressed_rids): + """Collaps pathway reactions ids. + + From list of reactions drop correlated reactions due to + working in same linear pathway + + :param uncompressed_rids: list of reaction ids in the model + :type uncompressed_rids: list or set of str + :return: reactions ids (considering only one per linear pathway) + :rtype: list of str + """ + retained_rids = [] + consumed_pwy_idxs = set() + for rid in uncompressed_rids: + if rid not in self.lpwys.rids2pwy: + retained_rids.append(rid) + elif self.lpwys.rids2pwy[rid] not in consumed_pwy_idxs: + retained_rids.append(rid) + consumed_pwy_idxs.add(self.lpwys.rids2pwy[rid]) + return retained_rids + + def uncompress_pwy_rids(self, compressed_rids): + """Uncompress reaction ids along linear pathways. + + :param compressed_rids: list of reaction ids + :type compressed_rids: list or set of str + :return: reactions ids expanded across linear pathways + :rtype: list of str + """ + uncompressed_rids = [] + for rid in compressed_rids: + if rid in self.lpwys.rids2pwy: + pwy_idx = self.lpwys.rids2pwy[rid] + uncompressed_rids.extend(self.lpwys.pwy2rids[pwy_idx]) + else: + uncompressed_rids.append(rid) + return uncompressed_rids + + def rids_sort_by_pwy(self, candidate_rids): + """Sort reactions ids considering linear pathways. + + In pruning, candidate reactions for deletion can be part of + linear pathways. + If candidate reactions is part of a protected pathway it will be considered essential + If candidate is part of an unprotected pathway, all reactions within same pathway + will be treated jointly + If candidates not part of pathways, will be at the end of sorted list + + :param candidate_rids: list of reaction ids + :type candidate_rids: list or set of str + :return: reactions ids sorted considering pathway information + :rtype: list of str + """ + network_rids = [] + protected_pwy_rids = [] + unprotected_pwy_rids = [] + unprotected_pwy_rids_pwylen = {} + for rid in candidate_rids: + if rid in self.lpwys.rids2pwy: + if self.lpwys.get_pathway_from_rid(rid)['protected'] is True: + protected_pwy_rids.append(rid) + else: + pwy_idx = self.lpwys.rids2pwy[rid] + pwy_len = len(self.lpwys.pwy2rids[pwy_idx]) + unprotected_pwy_rids_pwylen[rid] = pwy_len + else: + network_rids.append(rid) + + # rids part of unprotected pathways get sorted according to pathway length + pwy_lens_desc = sorted(list({pwy_len for pwy_len in unprotected_pwy_rids_pwylen.values()}), reverse=True) + for pwy_len in pwy_lens_desc: + for rid, p_len in unprotected_pwy_rids_pwylen.items(): + if p_len == pwy_len: + unprotected_pwy_rids.append(rid) + return protected_pwy_rids + unprotected_pwy_rids + network_rids + def prune(self, snapshot_freq=0): """Prune the metabolic network - Once the Metabolic network has been loaded it can be pruned. - With snapshot_freq one can deterimine if (snapshot_freq > 0) and - how often intermediate results shall be store, from which one can resume in future + Once the metabolic network has been loaded it can be pruned. + With snapshot_freq one can determine if (snapshot_freq > 0) and + how often intermediate results shall be store, from which one can resume. Step during Pruning 1. identification and removal of parallel reactions @@ -410,8 +517,9 @@ class ModelPruner: print(f'Initial S-matrix: {self.fba_model.s_mat_coo.shape}; ' f' protected reactions: {len(self.protected_rids)}:', self.protected_rids) - # remove any parallel reactions from the model - drop_rids = self.identify_parallel_reactions().difference(self.protected_rids) + # remove parallel reactions (i.e. from reactions with same stoichiometry, keep only one) + self.set_linear_pathways() + drop_rids = self.identify_parallel_reactions() if len(drop_rids) > 0: self.fba_model.remove_reactions(drop_rids) print(f'{len(drop_rids)} parallel reaction(s) dropped:', drop_rids) @@ -426,41 +534,53 @@ class ModelPruner: # main iterration loop to retain and remove reactions from the fba model retained_rids = self.protected_rids | self.essential_rids while len(retained_rids) < self.fba_model.s_mat_coo.shape[1]: + self.set_linear_pathways() free_rids = list(set(self.fba_model.rids).difference(retained_rids)) - reaction_types = self.reaction_types_fva(free_rids) + free_compressed_rids = self.compress_pwy_rids(free_rids) + reaction_types = self.reaction_types_fva(free_compressed_rids) print() - new_essential_rids = reaction_types['essential_rids'] - blocked_rids = reaction_types['blocked_rids'] + new_essential_rids = self.uncompress_pwy_rids(reaction_types['essential_rids']) + blocked_rids = self.uncompress_pwy_rids(reaction_types['blocked_rids']) candidate_rids = reaction_types['candidate_rids_sorted'] - if len(blocked_rids) > 0: - self.fba_model.remove_reactions(blocked_rids) - print(f"{len(blocked_rids)} blocked reaction(s) removed:", blocked_rids) + self.lpwys.set_protected_pathways(retained_rids, self.protected_sids) + # unprotected blocked reactions get removed from the model + unprotected_blocked_rids = [] + for rid in blocked_rids: + if self.lpwys.get_pathway_from_rid(rid)['protected'] is False: + unprotected_blocked_rids.append(rid) + else: + new_essential_rids.append(rid) + + if len(unprotected_blocked_rids) > 0: + self.fba_model.remove_reactions(unprotected_blocked_rids) + print(f"{len(unprotected_blocked_rids)} blocked reaction(s) removed:", unprotected_blocked_rids) - print(f"{len(candidate_rids)} candidates:", candidate_rids[:4], '...') + candidate_rids_sorted_by_pwy = self.rids_sort_by_pwy(candidate_rids) + print(f"{len(candidate_rids_sorted_by_pwy)} candidates:", candidate_rids_sorted_by_pwy[:4], '...') # try to remove one reaction while still satisfying all protected functions - for candidate_rid in candidate_rids: - # print('Check feasibility of', candidate_rid) - feasible = self.fba_model.check_pfs_feasibility(self.pps.protected_functions, - candidate_rid=candidate_rid) - if feasible is False: - new_essential_rids.add(candidate_rid) + for candidate_rid in candidate_rids_sorted_by_pwy: + + # if candidate is in a protected linear pathway, it is considered essential + if self.lpwys.get_pathway_from_rid(candidate_rid)['protected'] is True: + new_essential_rids.extend(self.uncompress_pwy_rids([candidate_rid])) continue + # if candidate not in a protected pathway, check if removal still satisfies protected functions else: - # check that active_protected_rids are not getting blocked - if len(self.active_protected_rids) > 0: - is_blocked = self.is_protected_rids_blocked(candidate_rid) - if is_blocked is True: - new_essential_rids.add(candidate_rid) - continue - self.fba_model.remove_reactions({candidate_rid}) - print(f' {candidate_rid} removed from the model') - break + feasible = self.fba_model.check_pfs_feasibility(self.pps.protected_functions, + candidate_rid=candidate_rid) + if feasible is False: + new_essential_rids.extend(self.uncompress_pwy_rids([candidate_rid])) + continue + else: + self.fba_model.remove_reactions(self.uncompress_pwy_rids([candidate_rid])) + print(f' {self.uncompress_pwy_rids([candidate_rid])} removed from the model') + break if len(new_essential_rids) > 0: - self.essential_rids |= new_essential_rids - retained_rids |= new_essential_rids + self.essential_rids |= set(new_essential_rids) + retained_rids |= set(new_essential_rids) print(f"{len(new_essential_rids)} reaction(s) added to essential reactions:", new_essential_rids) diff --git a/modelpruner/graph/linear_pathway.py b/modelpruner/graph/linear_pathway.py index baa43aa..8d539af 100644 --- a/modelpruner/graph/linear_pathway.py +++ b/modelpruner/graph/linear_pathway.py @@ -224,7 +224,7 @@ class LinearPathway: segments.reverse() else: self.inconsistant = True - print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}') + # print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}') break upstream_seg = segments[-1] return segments @@ -269,7 +269,7 @@ class LinearPathway: segments.reverse() else: self.inconsistant = True - print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}') + # print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}') break downstream_seg = segments[0] return segments diff --git a/modelpruner/graph/linear_pathways.py b/modelpruner/graph/linear_pathways.py index 77be0d3..aaf589a 100644 --- a/modelpruner/graph/linear_pathways.py +++ b/modelpruner/graph/linear_pathways.py @@ -21,25 +21,55 @@ class LinearPathways: """ self.ng = network_graph self.c2_sids = {node.sid: None for node in self.ng.nodes.values() if node.is_c2_node()} - self.pwy_rids = {} + self.rids2pwy = {} + self.pwy2rids = {} self.pathways = [] self.inconsistant_pathways = [] + self.protected_pathways = set() - def protect_reactions(self, protected_rids): - """Protect metabolites connected to selected reactions. + def exclude_sids(self, exclude_sids): + """Exclude selected metabolites from linear pathways. - :param protected_rids: list of reaction ids to be protected - :type protected_rids: list of str + :param exclude_sids: metabolite ids to exclude from c2 sids + :type exclude_sids: list or set of str """ - protected_sids = set() - for rid in protected_rids: - if rid in self.ng.edges: - protected_sids |= set(self.ng.edges[rid].reactants.keys()) - protected_sids |= set(self.ng.edges[rid].products.keys()) - for sid in protected_sids: + for sid in exclude_sids: if sid in self.c2_sids: del self.c2_sids[sid] + def set_protected_pathways(self, p_rids, p_sids): + """Identify protected pathways. + + Protected pathways are linear pathways that contain + protected reactions of protected metabolites + + :param p_rids: reaction ids of protected reactions + :type p_rids: list of str + :param p_sids: metabolite ids of protected metabolites + :type p_sids: list of str + """ + for rid in p_rids: + if rid in self.rids2pwy: + self.protected_pathways.add(self.rids2pwy[rid]) + for sid in p_sids: + if sid in self.c2_sids: + self.protected_pathways.add(self.c2_sids[sid]) + return self.protected_pathways + + def get_pathway_from_rid(self, rid): + pwy_idx = self.rids2pwy.get(rid, None) + protected = False + if (pwy_idx is not None) and (pwy_idx in self.protected_pathways): + protected = True + return {'rid': rid, 'pwy_idx': pwy_idx, 'protected': protected} + + def get_pathway_from_sid(self, sid): + pwy_idx = self.c2_sids.get(sid, None) + protected = False + if (pwy_idx is not None) and (pwy_idx in self.protected_pathways): + protected = True + return {'sid': sid, 'pwy_idx': pwy_idx, 'protected': protected} + def construct(self): """Construct linear pathways for the network. @@ -48,7 +78,7 @@ class LinearPathways: c2 metabolite we construct a complete linear pathways, including branches that consist of c2 metabolites. - We update references in c2_sids and pwy_rids to indicate in which pathway + We update references in c2_sids and rids2pwy to indicate in which pathway respective metabolites and reactions are contained. :return: number of linear pathways @@ -64,8 +94,15 @@ class LinearPathways: for subs_sid in lin_pwy.c2_sids_pwy: self.c2_sids[subs_sid] = pwy_idx for rid in lin_pwy.rids_scale: - self.pwy_rids[rid] = pwy_idx + self.rids2pwy[rid] = pwy_idx + pwy_idx += 1 else: self.inconsistant_pathways.append(lin_pwy) - pwy_idx += 1 + + for rid, pwy_idx in self.rids2pwy.items(): + if pwy_idx not in self.pwy2rids: + self.pwy2rids[pwy_idx] = [rid] + else: + self.pwy2rids[pwy_idx].append(rid) + return pwy_idx diff --git a/modelpruner/graph/network_graph.py b/modelpruner/graph/network_graph.py index 323805e..0658caa 100644 --- a/modelpruner/graph/network_graph.py +++ b/modelpruner/graph/network_graph.py @@ -17,6 +17,21 @@ class NetworkGraph: for (rid, rev) in zip(fba_model.rids, fba_model.reversible)} self.connect_nodes(fba_model) + def get_substrates(self, rids): + """Retrieve all substrates of selected reactions. + + :param rids: reaction ids for which to retrieve reactants and products + :type rids: list of str + :return: metabolite ids of substrates + :rtype: set of str + """ + substrate_sids = set() + for rid in rids: + if rid in self.edges: + substrate_sids |= set(self.edges[rid].reactants.keys()) + substrate_sids |= set(self.edges[rid].products.keys()) + return substrate_sids + def connect_nodes(self, fba_model): for (row, col, stoic) in zip(fba_model.s_mat_coo.row, fba_model.s_mat_coo.col, diff --git a/modelpruner/models/fba_model.py b/modelpruner/models/fba_model.py index be86128..461ac8b 100644 --- a/modelpruner/models/fba_model.py +++ b/modelpruner/models/fba_model.py @@ -72,6 +72,11 @@ class FbaModel: self.lp = None self.scaling = 'AUTO' # check also '2N' + self.biomass_rids = set() + for idx, row in model_dict['fbcObjectives'].iterrows(): + for reac_ref in sbmlxdf.misc.record_generator(row['fluxObjectives']): + self.biomass_rids.add(sbmlxdf.misc.extract_params(reac_ref)['reac']) + def update_reversible_rids(self): self.rids_rev = self.rids[self.reversible] self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)} @@ -343,7 +348,7 @@ class FbaModel: """remove reactions and blocked metabolites from model and lp problem :param drop_rids: reaction ids to be removed from the model - :type drop_rids: set of str + :type drop_rids: list or set of str """ if self.lp is None: self.lp = self.create_core_fba_lp() @@ -403,28 +408,6 @@ class FbaModel: self.remove_lp_objective(pf.objective) return res - def check_pf_feasibility(self, pf, zero_flux_rid=None): - """check that current model satisfies the protected functions - - :param pf: protected function (objective) - :type pf: class ProtectedFunction - :param zero_flux_rid: reaction id of candiated checked for removal - :type zero_flux_rid: str, optional (default: None) - :return: status if protected function satisfied - :rtype: bool - """ - feasible = True - res = self.fba_pf_optimize(pf, zero_flux_rid) - if res['success'] is False: - feasible = False - elif pf.obj_dir == 'maximize': - if res['fun'] < pf.target_lb: - feasible = False - else: - if res['fun'] > pf.target_ub: - feasible = False - return feasible - def check_pfs_feasibility(self, pfs, candidate_rid=None): """check that current model satisfies all protected functions -- GitLab