Skip to content
Snippets Groups Projects
Commit 1e44506e authored by Peter Schubert's avatar Peter Schubert
Browse files

Small code optimizations and annotation

parent 9e387b82
Branches
No related tags found
No related merge requests found
"""Implementation of ModelPruner class.
Prune SBML coded metabolic networks, i.e. reduce size/complexity by
removing reactions and metabolites while still protecing:
- specified phenotypes (e.g. growth on glucose within 90% of wild type)
- specified reactions
- specified metabolites (i.e. protecting reactions consuming/producing such
metabolites)
We basically remove redundant reactions (e.g. blocked reactions or reactions
in parallel pathways with corresponding metabolites)
- at each iteration step candidate reactions are identified with a flux range,
covering all protected functions, containing zero flux
One candidate is selected and removed, if it satisfied
- all protected functions
- does not block any of the protected functions
based on Erdrich, Steuer, Klamt. (2015) An algorithm for the reduction of
genome-scale metabolic network models to meaningful core models. BMC Syst Bio
and corresponding MATLAB code implemented in CellNetAnalyzer
http://www.mpi-magdeburg.mpg.de/projects/cna/cna.html
Peter Schubert, HHU Duesseldorf, September 2022
"""
import sys
import os
......@@ -82,17 +104,20 @@ class ModelPruner:
self.tolerance = 1e-12 # later accessible by set_params() ?
self.cpus = min(processes, os.cpu_count())
self.min_mp_total_flux = 1000
self.full_sbmnl_fname = sbml_fname
self.full_sbml_fname = sbml_fname
self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname)
# if resume option selected and snapshot model file exists, resume from there
if resume is True and os.path.exists(self._snapshot_sbml):
sbml_fname = self._snapshot_sbml
self.fba_model = FbaModel(sbml_fname)
self.pps = ProtectedParts(protected_parts)
self.protected_sids = self.pps.protected_sids
# if resume option selected and snapshot protected rids exists, resume from there
# note: during pruning, essential rids are added to protected rids
if resume is True and os.path.exists(self._snapshot_json):
with open(self._snapshot_json, 'r') as f:
snapshot_data = json.load(f)
......@@ -103,7 +128,7 @@ class ModelPruner:
self.protected_rids = self.pps.protected_rids
# self.protected_rids = self.get_total_protected_rids()
# overwrite the fba model bounds by general bound of protected parts (priot to LpProblem instantiation)
# overwrite fba model bounds by general bounds of protected parts (prior to LpProblem instantiation)
self.fba_model.update_model_flux_bounds(self.pps.overwrite_bounds)
self.fba_model.update_model_reversible(self.get_pp_reversible_reactions())
......@@ -121,8 +146,7 @@ class ModelPruner:
"""
# identify reactions that need protection due to protected metabolites
# over and above the protected rids provided in protected parts
sid_mask = np.array([True if sid in self.protected_sids else False
for sid in self.fba_model.sids])
sid_mask = [self.fba_model.sid2idx[sid] for sid in self.protected_sids]
psf_mask = self.fba_model.s_mat_coo.tocsr()[sid_mask].getnnz(axis=0)
protected_sid_rids = set(self.fba_model.rids[psf_mask])
protected_rids = self.pps.protected_rids.union(protected_sid_rids)
......@@ -137,7 +161,7 @@ class ModelPruner:
rev_pp_rids = set()
for rid, bounds in self.pps.overwrite_bounds.items():
for idx in [0, 1]:
if math.isfinite(bounds[idx]):
if math.isfinite(bounds[idx]) and bounds[idx] < 0.0:
rev_pp_rids.add(rid)
for pf in self.pps.protected_functions.values():
for rid, bounds in pf.overwrite_bounds.items():
......@@ -447,7 +471,7 @@ class ModelPruner:
"""
n_s, n_r = self.fba_model.s_mat_coo.shape
if pruned_sbml is None:
pruned_sbml = re.sub(r'.xml$', f'_pruned_{n_s}x{n_r}.xml', self.full_sbmnl_fname)
pruned_sbml = re.sub(r'.xml$', f'_pruned_{n_s}x{n_r}.xml', self.full_sbml_fname)
# overwrite model objective with objective of first protected function
first_pf = list(self.pps.protected_functions.values())[0]
......
"""Implementation of FbaModel class.
used to manage the FBA model used in ModelPruner
Model is created from SBML coded model file.
For performance reasons, the stoichiometric matrix is stored in
a sparce matrix data structure.
- speedup for data exchange with GlpkLinearProblem
- speedup for initiating multiprocessing (FVA)
reversible reactions are treated as two irreversible reactions
- this avoids high flux values during FBA
The FBA model gets reduced along the pruning by removing reactions
and blocked metabolites.
Peter Schubert, HHU Duesseldorf, September 2022
"""
import os
......@@ -36,10 +54,12 @@ class FbaModel:
self.flux_bounds = np.vstack((df_reactions['fbcLb'].to_numpy(),
df_reactions['fbcUb'].to_numpy()))
self.reversible = df_reactions['reversible'].to_numpy().copy()
for idx in range(len(self.reversible)):
if self.flux_bounds[0, idx] < 0 and np.logical_not(self.reversible[idx]):
self.reversible[idx] = True
print(f'reaction {idx} set to reversible')
mask = np.logical_and(self.flux_bounds[0] < 0.0, np.logical_not(self.reversible))
if sum(mask) > 0:
self.reversible[mask] = True
upd_rev_rids = ', '.join([rid for rid in self.rids[mask]])
print(f'reaction(s) {upd_rev_rids} set to reversible, due to configured flux bounds')
# reversible reactions get split into forward and reverse irreversible reactions
self.rids_rev = self.rids[self.reversible]
self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)}
# using a sparse matrix speeds up significantly pickling (factor > 350 for S-matrix)
......@@ -56,13 +76,13 @@ class FbaModel:
self.rids_rev = self.rids[self.reversible]
self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)}
def update_model_reversible(self, reactions):
"""Update reversible flag of reactions
def update_model_reversible(self, rev_pp_rids):
"""Update reversible flag for reactions with negative flux in protected parts
:param reactions: reactions ids that have negative flux bounds
:type reactions: set of str
:param rev_pp_rids: reactions ids that have negative flux bounds
:type rev_pp_rids: set of str
"""
upd_rids = set(reactions).difference(set(self.rids_rev))
upd_rids = set(rev_pp_rids).difference(set(self.rids_rev))
for rid in upd_rids:
self.reversible[self.rid2idx[rid]] = True
if len(upd_rids) > 0:
......@@ -72,8 +92,6 @@ class FbaModel:
def update_model_flux_bounds(self, flux_bounds):
"""Overwrite model flux bounds with protected parts flux bounds.
lp problem might need to be instantiated
:param flux_bounds: lower and upper flux bounds for selected reactions
:type flux_bounds: dict: key: reaction id, value: list-like of two float values
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment