diff --git a/modelpruner/core/model_pruner.py b/modelpruner/core/model_pruner.py index 9a841427e29870557ee59be3193d8e273aed195e..f2905faf88e898f66860d5d6c2fbf05594359601 100644 --- a/modelpruner/core/model_pruner.py +++ b/modelpruner/core/model_pruner.py @@ -1,4 +1,26 @@ - +"""Implementation of ModelPruner class. + +Prune SBML coded metabolic networks, i.e. reduce size/complexity by +removing reactions and metabolites while still protecing: +- specified phenotypes (e.g. growth on glucose within 90% of wild type) +- specified reactions +- specified metabolites (i.e. protecting reactions consuming/producing such + metabolites) +We basically remove redundant reactions (e.g. blocked reactions or reactions +in parallel pathways with corresponding metabolites) +- at each iteration step candidate reactions are identified with a flux range, + covering all protected functions, containing zero flux + One candidate is selected and removed, if it satisfied + - all protected functions + - does not block any of the protected functions + +based on Erdrich, Steuer, Klamt. (2015) An algorithm for the reduction of +genome-scale metabolic network models to meaningful core models. BMC Syst Bio +and corresponding MATLAB code implemented in CellNetAnalyzer +http://www.mpi-magdeburg.mpg.de/projects/cna/cna.html + +Peter Schubert, HHU Duesseldorf, September 2022 +""" import sys import os @@ -82,17 +104,20 @@ class ModelPruner: self.tolerance = 1e-12 # later accessible by set_params() ? self.cpus = min(processes, os.cpu_count()) self.min_mp_total_flux = 1000 - self.full_sbmnl_fname = sbml_fname + self.full_sbml_fname = sbml_fname self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname) self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname) + # if resume option selected and snapshot model file exists, resume from there if resume is True and os.path.exists(self._snapshot_sbml): sbml_fname = self._snapshot_sbml self.fba_model = FbaModel(sbml_fname) self.pps = ProtectedParts(protected_parts) self.protected_sids = self.pps.protected_sids + # if resume option selected and snapshot protected rids exists, resume from there + # note: during pruning, essential rids are added to protected rids if resume is True and os.path.exists(self._snapshot_json): with open(self._snapshot_json, 'r') as f: snapshot_data = json.load(f) @@ -103,7 +128,7 @@ class ModelPruner: self.protected_rids = self.pps.protected_rids # self.protected_rids = self.get_total_protected_rids() - # overwrite the fba model bounds by general bound of protected parts (priot to LpProblem instantiation) + # overwrite fba model bounds by general bounds of protected parts (prior to LpProblem instantiation) self.fba_model.update_model_flux_bounds(self.pps.overwrite_bounds) self.fba_model.update_model_reversible(self.get_pp_reversible_reactions()) @@ -121,8 +146,7 @@ class ModelPruner: """ # identify reactions that need protection due to protected metabolites # over and above the protected rids provided in protected parts - sid_mask = np.array([True if sid in self.protected_sids else False - for sid in self.fba_model.sids]) + sid_mask = [self.fba_model.sid2idx[sid] for sid in self.protected_sids] psf_mask = self.fba_model.s_mat_coo.tocsr()[sid_mask].getnnz(axis=0) protected_sid_rids = set(self.fba_model.rids[psf_mask]) protected_rids = self.pps.protected_rids.union(protected_sid_rids) @@ -137,7 +161,7 @@ class ModelPruner: rev_pp_rids = set() for rid, bounds in self.pps.overwrite_bounds.items(): for idx in [0, 1]: - if math.isfinite(bounds[idx]): + if math.isfinite(bounds[idx]) and bounds[idx] < 0.0: rev_pp_rids.add(rid) for pf in self.pps.protected_functions.values(): for rid, bounds in pf.overwrite_bounds.items(): @@ -447,7 +471,7 @@ class ModelPruner: """ n_s, n_r = self.fba_model.s_mat_coo.shape if pruned_sbml is None: - pruned_sbml = re.sub(r'.xml$', f'_pruned_{n_s}x{n_r}.xml', self.full_sbmnl_fname) + pruned_sbml = re.sub(r'.xml$', f'_pruned_{n_s}x{n_r}.xml', self.full_sbml_fname) # overwrite model objective with objective of first protected function first_pf = list(self.pps.protected_functions.values())[0] diff --git a/modelpruner/models/fba_model.py b/modelpruner/models/fba_model.py index 9021ab5a226747d5347976949875a5287b3db58c..5dfbbf3e03aa2d0cd2c781d36b6e08fd5ffa0d88 100644 --- a/modelpruner/models/fba_model.py +++ b/modelpruner/models/fba_model.py @@ -1,3 +1,21 @@ +"""Implementation of FbaModel class. + +used to manage the FBA model used in ModelPruner +Model is created from SBML coded model file. + +For performance reasons, the stoichiometric matrix is stored in +a sparce matrix data structure. +- speedup for data exchange with GlpkLinearProblem +- speedup for initiating multiprocessing (FVA) + +reversible reactions are treated as two irreversible reactions +- this avoids high flux values during FBA + +The FBA model gets reduced along the pruning by removing reactions +and blocked metabolites. + +Peter Schubert, HHU Duesseldorf, September 2022 +""" import os @@ -36,10 +54,12 @@ class FbaModel: self.flux_bounds = np.vstack((df_reactions['fbcLb'].to_numpy(), df_reactions['fbcUb'].to_numpy())) self.reversible = df_reactions['reversible'].to_numpy().copy() - for idx in range(len(self.reversible)): - if self.flux_bounds[0, idx] < 0 and np.logical_not(self.reversible[idx]): - self.reversible[idx] = True - print(f'reaction {idx} set to reversible') + mask = np.logical_and(self.flux_bounds[0] < 0.0, np.logical_not(self.reversible)) + if sum(mask) > 0: + self.reversible[mask] = True + upd_rev_rids = ', '.join([rid for rid in self.rids[mask]]) + print(f'reaction(s) {upd_rev_rids} set to reversible, due to configured flux bounds') + # reversible reactions get split into forward and reverse irreversible reactions self.rids_rev = self.rids[self.reversible] self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)} # using a sparse matrix speeds up significantly pickling (factor > 350 for S-matrix) @@ -56,13 +76,13 @@ class FbaModel: self.rids_rev = self.rids[self.reversible] self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)} - def update_model_reversible(self, reactions): - """Update reversible flag of reactions + def update_model_reversible(self, rev_pp_rids): + """Update reversible flag for reactions with negative flux in protected parts - :param reactions: reactions ids that have negative flux bounds - :type reactions: set of str + :param rev_pp_rids: reactions ids that have negative flux bounds + :type rev_pp_rids: set of str """ - upd_rids = set(reactions).difference(set(self.rids_rev)) + upd_rids = set(rev_pp_rids).difference(set(self.rids_rev)) for rid in upd_rids: self.reversible[self.rid2idx[rid]] = True if len(upd_rids) > 0: @@ -72,8 +92,6 @@ class FbaModel: def update_model_flux_bounds(self, flux_bounds): """Overwrite model flux bounds with protected parts flux bounds. - lp problem might need to be instantiated - :param flux_bounds: lower and upper flux bounds for selected reactions :type flux_bounds: dict: key: reaction id, value: list-like of two float values """