Skip to content
Snippets Groups Projects
Commit 4c86e5d7 authored by Peter Schubert's avatar Peter Schubert
Browse files

Considering linear pathways during pruning

parent dd179e57
No related branches found
No related tags found
No related merge requests found
"""
Definition of version string.
"""
__version__ = "0.3.0"
__version__ = "0.3.1"
program_name = 'modelpruner'
......@@ -34,6 +34,8 @@ import json
from modelpruner.models.fba_model import FbaModel
from modelpruner.core.protected_parts import ProtectedParts
from modelpruner.graph.network_graph import NetworkGraph
from modelpruner.graph.linear_pathways import LinearPathways
global _fba_model
global _pfs
......@@ -90,7 +92,7 @@ def _worker(rids):
class ModelPruner:
def __init__(self, sbml_fname, protected_parts, resume=False, processes=100):
"""Init
"""Initialize ModelPruner
:param sbml_fname: pathname of original model (ending with '.xml')
:type sbml_fname: str
......@@ -105,6 +107,8 @@ class ModelPruner:
self.cpus = min(processes, os.cpu_count())
self.min_mp_total_flux = 1000
self.full_sbml_fname = sbml_fname
self.ng = None
self.lpwys = None
self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname)
......@@ -116,8 +120,8 @@ class ModelPruner:
self.pps = ProtectedParts(protected_parts)
self.protected_sids = self.pps.protected_sids
self.protected_rids = self.get_total_protected_rids()
self.protected_rids = self.pps.protected_rids
# self.protected_rids = self.get_total_protected_rids()
self.essential_rids = set()
# if resume option selected and snapshot of essential rids exists, use them
......@@ -138,10 +142,11 @@ class ModelPruner:
notes = self.pps.protected_functions[idx].notes
print(f' protected function {idx}, optimum: {val:.04f} ({notes})')
print('Check consistency of protected parts')
print('Check consistency of protected parts:')
blocked_p_rids = self.check_protected_parts()
self.active_protected_rids = self.protected_rids.difference(blocked_p_rids)
# TODO: can be removed once pruning validated
def get_total_protected_rids(self):
"""Determine protected reactions, incl. reactions connecting protected metabolites.
......@@ -225,7 +230,10 @@ class ModelPruner:
if len(pf_unprotected_rids) > 0:
print(' Reactions in protected functions that are not protected:', pf_unprotected_rids)
blocked_rids = self.reaction_types_fva(self.fba_model.rids)['blocked_rids']
self.set_linear_pathways()
compressed_rids = self.compress_pwy_rids(self.fba_model.rids)
reaction_types = self.reaction_types_fva(compressed_rids)
blocked_rids = self.uncompress_pwy_rids(reaction_types['blocked_rids'])
blocked_p_rids = self.protected_rids.intersection(blocked_rids)
blocked_pf_rids = pf_rids.intersection(blocked_rids)
if len(blocked_p_rids) > 0:
......@@ -237,29 +245,30 @@ class ModelPruner:
def identify_parallel_reactions(self):
"""Identify parallel reactions having same stoichiometry (considering reverse).
protected reaction are preserved.
reversible reactions have higher priority
From all reactions in a group of parallel reactions, try to preserve one reaction
in the model
protected reactions or reactions in protected linear pathways are preserved
in any case.
if there exists a reversible reaction, we try to preserve it
:return: reaction ids of parallel reactions
:rtype: set of str
"""
drop_rids = set()
for group_rids in self.fba_model.get_parallel_rids():
protected = set()
drop_parallel_rids = set()
for parallel_rids in self.fba_model.get_parallel_rids():
drop_candidate_rids = set(parallel_rids)
rid_rev = None
for rid in group_rids:
if rid in self.protected_rids:
protected.add(rid)
if rid in self.fba_model.rids_rev:
for rid in parallel_rids:
if (rid in self.protected_rids) or (self.lpwys.get_pathway_from_rid(rid)['protected'] is True):
drop_candidate_rids.remove(rid)
elif rid in self.fba_model.rids_rev:
rid_rev = rid
if len(protected) > 0:
drop_group_rids = set(group_rids) - protected
elif rid_rev is not None:
drop_group_rids = set(group_rids) - {rid_rev}
else:
drop_group_rids = set(group_rids[1:])
drop_rids |= drop_group_rids
return drop_rids
if len(drop_candidate_rids) > 1 and rid_rev is not None:
drop_candidate_rids.remove(rid_rev)
elif len(drop_candidate_rids) == len(parallel_rids):
drop_candidate_rids.pop()
drop_parallel_rids |= drop_candidate_rids
return drop_parallel_rids
def is_protected_rids_blocked(self, candidate_rid):
"""check if active protected reactions get blocked by removal of specified reaction.
......@@ -296,14 +305,13 @@ class ModelPruner:
:rtype: dict
"""
# print(time.strftime("%H:%M:%S", time.localtime()))
n_pf = len(self.pps.protected_functions)
flux_min_pfs = np.zeros((n_pf, len(free_rids)))
flux_max_pfs = np.zeros((n_pf, len(free_rids)))
res = {}
# setting up multiprocessing takes quite some resources
# there is a tradeoff wrt running on one processor
# there is a tradeoff wrt running on a single processor
processes = min(self.cpus, int(len(free_rids)*n_pf/self.min_mp_total_flux))
# processes = min(self.cpus, len(free_rids))
......@@ -327,7 +335,6 @@ class ModelPruner:
flux_max_pfs[idx] = res['max']
else:
print(f'FBA for condition {pf.obj_id} is not feasible')
# TODO: can this happen? how best to continue?
flux_min_pfs[idx] = np.nan
flux_max_pfs[idx] = np.nan
break
......@@ -387,12 +394,112 @@ class ModelPruner:
'candidate_rids_sorted': candidate_rids_sorted}
return reaction_types
def set_linear_pathways(self, protected_rids=None):
"""Retrieve and set linear pathways from fba model.
Construct from current fba model a network graph and linear pathways
Exclude specific metabolites from participating in linear pathways.
Such specific metabolites are substrates consumed/produced by
specific reactions (protected reactions)
Default in case no protected reactions are supplied is excluding
all metabolites used in the fba objective functions of the fba model
(e.g. the Biomass functions)
Note: This blocks constructing linear pathways connected through
the biomass functions
:param protected_rids: list of reaction ids from whith to retrieve substrates
:type protected_rids: None (= fba biomass reactions) or list/set of str, default: None
"""
ng = NetworkGraph(self.fba_model)
self.lpwys = LinearPathways(ng)
if protected_rids is None:
protected_rids = self.fba_model.biomass_rids
exclude_sids = ng.get_substrates(protected_rids)
self.lpwys.exclude_sids(exclude_sids)
self.lpwys.construct()
def compress_pwy_rids(self, uncompressed_rids):
"""Collaps pathway reactions ids.
From list of reactions drop correlated reactions due to
working in same linear pathway
:param uncompressed_rids: list of reaction ids in the model
:type uncompressed_rids: list or set of str
:return: reactions ids (considering only one per linear pathway)
:rtype: list of str
"""
retained_rids = []
consumed_pwy_idxs = set()
for rid in uncompressed_rids:
if rid not in self.lpwys.rids2pwy:
retained_rids.append(rid)
elif self.lpwys.rids2pwy[rid] not in consumed_pwy_idxs:
retained_rids.append(rid)
consumed_pwy_idxs.add(self.lpwys.rids2pwy[rid])
return retained_rids
def uncompress_pwy_rids(self, compressed_rids):
"""Uncompress reaction ids along linear pathways.
:param compressed_rids: list of reaction ids
:type compressed_rids: list or set of str
:return: reactions ids expanded across linear pathways
:rtype: list of str
"""
uncompressed_rids = []
for rid in compressed_rids:
if rid in self.lpwys.rids2pwy:
pwy_idx = self.lpwys.rids2pwy[rid]
uncompressed_rids.extend(self.lpwys.pwy2rids[pwy_idx])
else:
uncompressed_rids.append(rid)
return uncompressed_rids
def rids_sort_by_pwy(self, candidate_rids):
"""Sort reactions ids considering linear pathways.
In pruning, candidate reactions for deletion can be part of
linear pathways.
If candidate reactions is part of a protected pathway it will be considered essential
If candidate is part of an unprotected pathway, all reactions within same pathway
will be treated jointly
If candidates not part of pathways, will be at the end of sorted list
:param candidate_rids: list of reaction ids
:type candidate_rids: list or set of str
:return: reactions ids sorted considering pathway information
:rtype: list of str
"""
network_rids = []
protected_pwy_rids = []
unprotected_pwy_rids = []
unprotected_pwy_rids_pwylen = {}
for rid in candidate_rids:
if rid in self.lpwys.rids2pwy:
if self.lpwys.get_pathway_from_rid(rid)['protected'] is True:
protected_pwy_rids.append(rid)
else:
pwy_idx = self.lpwys.rids2pwy[rid]
pwy_len = len(self.lpwys.pwy2rids[pwy_idx])
unprotected_pwy_rids_pwylen[rid] = pwy_len
else:
network_rids.append(rid)
# rids part of unprotected pathways get sorted according to pathway length
pwy_lens_desc = sorted(list({pwy_len for pwy_len in unprotected_pwy_rids_pwylen.values()}), reverse=True)
for pwy_len in pwy_lens_desc:
for rid, p_len in unprotected_pwy_rids_pwylen.items():
if p_len == pwy_len:
unprotected_pwy_rids.append(rid)
return protected_pwy_rids + unprotected_pwy_rids + network_rids
def prune(self, snapshot_freq=0):
"""Prune the metabolic network
Once the Metabolic network has been loaded it can be pruned.
With snapshot_freq one can deterimine if (snapshot_freq > 0) and
how often intermediate results shall be store, from which one can resume in future
Once the metabolic network has been loaded it can be pruned.
With snapshot_freq one can determine if (snapshot_freq > 0) and
how often intermediate results shall be store, from which one can resume.
Step during Pruning
1. identification and removal of parallel reactions
......@@ -410,8 +517,9 @@ class ModelPruner:
print(f'Initial S-matrix: {self.fba_model.s_mat_coo.shape}; '
f' protected reactions: {len(self.protected_rids)}:', self.protected_rids)
# remove any parallel reactions from the model
drop_rids = self.identify_parallel_reactions().difference(self.protected_rids)
# remove parallel reactions (i.e. from reactions with same stoichiometry, keep only one)
self.set_linear_pathways()
drop_rids = self.identify_parallel_reactions()
if len(drop_rids) > 0:
self.fba_model.remove_reactions(drop_rids)
print(f'{len(drop_rids)} parallel reaction(s) dropped:', drop_rids)
......@@ -426,41 +534,53 @@ class ModelPruner:
# main iterration loop to retain and remove reactions from the fba model
retained_rids = self.protected_rids | self.essential_rids
while len(retained_rids) < self.fba_model.s_mat_coo.shape[1]:
self.set_linear_pathways()
free_rids = list(set(self.fba_model.rids).difference(retained_rids))
reaction_types = self.reaction_types_fva(free_rids)
free_compressed_rids = self.compress_pwy_rids(free_rids)
reaction_types = self.reaction_types_fva(free_compressed_rids)
print()
new_essential_rids = reaction_types['essential_rids']
blocked_rids = reaction_types['blocked_rids']
new_essential_rids = self.uncompress_pwy_rids(reaction_types['essential_rids'])
blocked_rids = self.uncompress_pwy_rids(reaction_types['blocked_rids'])
candidate_rids = reaction_types['candidate_rids_sorted']
if len(blocked_rids) > 0:
self.fba_model.remove_reactions(blocked_rids)
print(f"{len(blocked_rids)} blocked reaction(s) removed:", blocked_rids)
self.lpwys.set_protected_pathways(retained_rids, self.protected_sids)
# unprotected blocked reactions get removed from the model
unprotected_blocked_rids = []
for rid in blocked_rids:
if self.lpwys.get_pathway_from_rid(rid)['protected'] is False:
unprotected_blocked_rids.append(rid)
else:
new_essential_rids.append(rid)
if len(unprotected_blocked_rids) > 0:
self.fba_model.remove_reactions(unprotected_blocked_rids)
print(f"{len(unprotected_blocked_rids)} blocked reaction(s) removed:", unprotected_blocked_rids)
print(f"{len(candidate_rids)} candidates:", candidate_rids[:4], '...')
candidate_rids_sorted_by_pwy = self.rids_sort_by_pwy(candidate_rids)
print(f"{len(candidate_rids_sorted_by_pwy)} candidates:", candidate_rids_sorted_by_pwy[:4], '...')
# try to remove one reaction while still satisfying all protected functions
for candidate_rid in candidate_rids:
# print('Check feasibility of', candidate_rid)
for candidate_rid in candidate_rids_sorted_by_pwy:
# if candidate is in a protected linear pathway, it is considered essential
if self.lpwys.get_pathway_from_rid(candidate_rid)['protected'] is True:
new_essential_rids.extend(self.uncompress_pwy_rids([candidate_rid]))
continue
# if candidate not in a protected pathway, check if removal still satisfies protected functions
else:
feasible = self.fba_model.check_pfs_feasibility(self.pps.protected_functions,
candidate_rid=candidate_rid)
if feasible is False:
new_essential_rids.add(candidate_rid)
new_essential_rids.extend(self.uncompress_pwy_rids([candidate_rid]))
continue
else:
# check that active_protected_rids are not getting blocked
if len(self.active_protected_rids) > 0:
is_blocked = self.is_protected_rids_blocked(candidate_rid)
if is_blocked is True:
new_essential_rids.add(candidate_rid)
continue
self.fba_model.remove_reactions({candidate_rid})
print(f' {candidate_rid} removed from the model')
self.fba_model.remove_reactions(self.uncompress_pwy_rids([candidate_rid]))
print(f' {self.uncompress_pwy_rids([candidate_rid])} removed from the model')
break
if len(new_essential_rids) > 0:
self.essential_rids |= new_essential_rids
retained_rids |= new_essential_rids
self.essential_rids |= set(new_essential_rids)
retained_rids |= set(new_essential_rids)
print(f"{len(new_essential_rids)} reaction(s) added to essential reactions:",
new_essential_rids)
......
......@@ -224,7 +224,7 @@ class LinearPathway:
segments.reverse()
else:
self.inconsistant = True
print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
# print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
break
upstream_seg = segments[-1]
return segments
......@@ -269,7 +269,7 @@ class LinearPathway:
segments.reverse()
else:
self.inconsistant = True
print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
# print(f'inconsistent configuration at {pwy_seg.sid}, starting from {self.init_sid}')
break
downstream_seg = segments[0]
return segments
......
......@@ -21,25 +21,55 @@ class LinearPathways:
"""
self.ng = network_graph
self.c2_sids = {node.sid: None for node in self.ng.nodes.values() if node.is_c2_node()}
self.pwy_rids = {}
self.rids2pwy = {}
self.pwy2rids = {}
self.pathways = []
self.inconsistant_pathways = []
self.protected_pathways = set()
def protect_reactions(self, protected_rids):
"""Protect metabolites connected to selected reactions.
def exclude_sids(self, exclude_sids):
"""Exclude selected metabolites from linear pathways.
:param protected_rids: list of reaction ids to be protected
:type protected_rids: list of str
:param exclude_sids: metabolite ids to exclude from c2 sids
:type exclude_sids: list or set of str
"""
protected_sids = set()
for rid in protected_rids:
if rid in self.ng.edges:
protected_sids |= set(self.ng.edges[rid].reactants.keys())
protected_sids |= set(self.ng.edges[rid].products.keys())
for sid in protected_sids:
for sid in exclude_sids:
if sid in self.c2_sids:
del self.c2_sids[sid]
def set_protected_pathways(self, p_rids, p_sids):
"""Identify protected pathways.
Protected pathways are linear pathways that contain
protected reactions of protected metabolites
:param p_rids: reaction ids of protected reactions
:type p_rids: list of str
:param p_sids: metabolite ids of protected metabolites
:type p_sids: list of str
"""
for rid in p_rids:
if rid in self.rids2pwy:
self.protected_pathways.add(self.rids2pwy[rid])
for sid in p_sids:
if sid in self.c2_sids:
self.protected_pathways.add(self.c2_sids[sid])
return self.protected_pathways
def get_pathway_from_rid(self, rid):
pwy_idx = self.rids2pwy.get(rid, None)
protected = False
if (pwy_idx is not None) and (pwy_idx in self.protected_pathways):
protected = True
return {'rid': rid, 'pwy_idx': pwy_idx, 'protected': protected}
def get_pathway_from_sid(self, sid):
pwy_idx = self.c2_sids.get(sid, None)
protected = False
if (pwy_idx is not None) and (pwy_idx in self.protected_pathways):
protected = True
return {'sid': sid, 'pwy_idx': pwy_idx, 'protected': protected}
def construct(self):
"""Construct linear pathways for the network.
......@@ -48,7 +78,7 @@ class LinearPathways:
c2 metabolite we construct a complete linear pathways, including branches that
consist of c2 metabolites.
We update references in c2_sids and pwy_rids to indicate in which pathway
We update references in c2_sids and rids2pwy to indicate in which pathway
respective metabolites and reactions are contained.
:return: number of linear pathways
......@@ -64,8 +94,15 @@ class LinearPathways:
for subs_sid in lin_pwy.c2_sids_pwy:
self.c2_sids[subs_sid] = pwy_idx
for rid in lin_pwy.rids_scale:
self.pwy_rids[rid] = pwy_idx
self.rids2pwy[rid] = pwy_idx
pwy_idx += 1
else:
self.inconsistant_pathways.append(lin_pwy)
pwy_idx += 1
for rid, pwy_idx in self.rids2pwy.items():
if pwy_idx not in self.pwy2rids:
self.pwy2rids[pwy_idx] = [rid]
else:
self.pwy2rids[pwy_idx].append(rid)
return pwy_idx
......@@ -17,6 +17,21 @@ class NetworkGraph:
for (rid, rev) in zip(fba_model.rids, fba_model.reversible)}
self.connect_nodes(fba_model)
def get_substrates(self, rids):
"""Retrieve all substrates of selected reactions.
:param rids: reaction ids for which to retrieve reactants and products
:type rids: list of str
:return: metabolite ids of substrates
:rtype: set of str
"""
substrate_sids = set()
for rid in rids:
if rid in self.edges:
substrate_sids |= set(self.edges[rid].reactants.keys())
substrate_sids |= set(self.edges[rid].products.keys())
return substrate_sids
def connect_nodes(self, fba_model):
for (row, col, stoic) in zip(fba_model.s_mat_coo.row,
fba_model.s_mat_coo.col,
......
......@@ -72,6 +72,11 @@ class FbaModel:
self.lp = None
self.scaling = 'AUTO' # check also '2N'
self.biomass_rids = set()
for idx, row in model_dict['fbcObjectives'].iterrows():
for reac_ref in sbmlxdf.misc.record_generator(row['fluxObjectives']):
self.biomass_rids.add(sbmlxdf.misc.extract_params(reac_ref)['reac'])
def update_reversible_rids(self):
self.rids_rev = self.rids[self.reversible]
self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)}
......@@ -343,7 +348,7 @@ class FbaModel:
"""remove reactions and blocked metabolites from model and lp problem
:param drop_rids: reaction ids to be removed from the model
:type drop_rids: set of str
:type drop_rids: list or set of str
"""
if self.lp is None:
self.lp = self.create_core_fba_lp()
......@@ -403,28 +408,6 @@ class FbaModel:
self.remove_lp_objective(pf.objective)
return res
def check_pf_feasibility(self, pf, zero_flux_rid=None):
"""check that current model satisfies the protected functions
:param pf: protected function (objective)
:type pf: class ProtectedFunction
:param zero_flux_rid: reaction id of candiated checked for removal
:type zero_flux_rid: str, optional (default: None)
:return: status if protected function satisfied
:rtype: bool
"""
feasible = True
res = self.fba_pf_optimize(pf, zero_flux_rid)
if res['success'] is False:
feasible = False
elif pf.obj_dir == 'maximize':
if res['fun'] < pf.target_lb:
feasible = False
else:
if res['fun'] > pf.target_ub:
feasible = False
return feasible
def check_pfs_feasibility(self, pfs, candidate_rid=None):
"""check that current model satisfies all protected functions
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment