Skip to content
Snippets Groups Projects
Commit b58855bb authored by Peter Schubert's avatar Peter Schubert
Browse files

Multiprocessing implemented for FVA (validated on macOS)

parent 9b98c8c4
Branches
No related tags found
No related merge requests found
...@@ -5,7 +5,6 @@ import os ...@@ -5,7 +5,6 @@ import os
import re import re
import time import time
import math import math
import copy
import numpy as np import numpy as np
import multiprocessing as mp import multiprocessing as mp
import json import json
...@@ -14,30 +13,72 @@ from modelpruner.models.fba_model import FbaModel ...@@ -14,30 +13,72 @@ from modelpruner.models.fba_model import FbaModel
from modelpruner.core.protected_parts import ProtectedParts from modelpruner.core.protected_parts import ProtectedParts
global _fba_model global _fba_model
global _pf global _pfs
def init_worker(fba_model, pf): def _init_worker(fba_model, pfs):
"""Multiprocessing initializer for workers.
Configure global parameters for the worker process
:param fba_model: Fba model to apply FVA
:type fba_model: Class FbaModel
:param pfs: Protected Functions
:type pfs: dict, key: function id, value: class ProtectedFunction
"""
global _fba_model global _fba_model
global _pf global _pfs
_fba_model = fba_model.copy() _fba_model = fba_model.copy()
_pf = pf _pfs = pfs
def _worker(rids):
"""Mulitprocessing worker to run FVA for selected reactions over all conditions.
Checked that best performance achieved when worker performs FVA for
all protected functions and a larger range for reactions.
def worker(rid): :param rids: reactions for FVA
:type rids: list-like of str
"""
global _fba_model global _fba_model
global _pf global _pfs
return _fba_model.fva_pf_flux_ranges(_pf, [rid]) flux_min_pfs = np.zeros((len(_pfs), len(rids)))
flux_max_pfs = np.zeros((len(_pfs), len(rids)))
for idx, pf in enumerate(_pfs.values()):
res = _fba_model.fva_pf_flux_ranges(pf, rids)
if res['success'] is True:
flux_min_pfs[idx] = res['min']
flux_max_pfs[idx] = res['max']
else:
print(f'FBA for condition {pf.obj_id} is not feasible')
flux_min_pfs[idx] = np.nan
flux_max_pfs[idx] = np.nan
break
# delete linear problem
# del _fba_model.delete_fba_lp()
res = {'success': True, 'rids': rids, 'min_pfs': flux_min_pfs, 'max_pfs': flux_max_pfs}
return res
class ModelPruner: class ModelPruner:
def __init__(self, sbml_fname, protected_parts, resume=False): def __init__(self, sbml_fname, protected_parts, resume=False):
"""Init
:param sbml_fname: pathname of original model (ending with '.xml')
:type sbml_fname: str
:param protected_parts: pathname of protected parts file or dict
:type protected_parts: str or dict with protected parts information
:param resume: Indicator if to resume pruning from stored snapshot
:type resume: bool, optional (default: False)
"""
self.tolerance = 1e-7 # later accessible by set_params() ? self.tolerance = 1e-7 # later accessible by set_params() ?
self.cpus = int(os.cpu_count() * 0.8) self.cpus = int(os.cpu_count() - 0)
self.min_mp_total_flux = 1000
self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname) self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname) self._snapshot_json = re.sub(r'.xml$', '_snapshot.json', sbml_fname)
...@@ -52,7 +93,7 @@ class ModelPruner: ...@@ -52,7 +93,7 @@ class ModelPruner:
with open(self._snapshot_json, 'r') as f: with open(self._snapshot_json, 'r') as f:
snapshot_data = json.load(f) snapshot_data = json.load(f)
self.protected_rids = set(snapshot_data['protected_rids']) self.protected_rids = set(snapshot_data['protected_rids'])
print(f'snapshot restored at {len(self.fba_model.rids):4d} remaining reactions ' print(f'Snapshot restored at {len(self.fba_model.rids):4d} remaining reactions '
f'from: {self._snapshot_json}') f'from: {self._snapshot_json}')
else: else:
self.protected_rids = self.nrp.protected_rids self.protected_rids = self.nrp.protected_rids
...@@ -62,19 +103,24 @@ class ModelPruner: ...@@ -62,19 +103,24 @@ class ModelPruner:
self.fba_model.update_model_flux_bounds(self.nrp.overwrite_bounds) self.fba_model.update_model_flux_bounds(self.nrp.overwrite_bounds)
self.fba_model.update_model_reversible(self.get_pp_reversible_reactions()) self.fba_model.update_model_reversible(self.get_pp_reversible_reactions())
print('determine optimal values for protected functions') print('Determine optimal values for protected functions')
self.set_function_optimum() self.set_function_optimum()
print('Check consistency of protected parts') print('Check consistency of protected parts')
self.check_protected_parts() self.check_protected_parts()
def get_total_protected_rids(self): def get_total_protected_rids(self):
"""Determine protected reactions, incl. those leading to protected metabolites.
:return: total protected reaction ids
:rtype: set of str
"""
# identify reactions that need protection due to protected metabolites # identify reactions that need protection due to protected metabolites
# over and above the protected rids provided in protected parts # over and above the protected rids provided in protected parts
sid_mask = np.array([True if sid in self.protected_sids else False sid_mask = np.array([True if sid in self.protected_sids else False
for sid in self.fba_model.sids]) for sid in self.fba_model.sids])
psf_mask = np.any(self.fba_model.s_mat[sid_mask] != 0, axis=0) psf_mask = self.fba_model.s_mat_coo.tocsr()[sid_mask].getnnz(axis=0)
protected_sid_rids = set(self.fba_model.rids[psf_mask]) protected_sid_rids = set(self.fba_model.rids[psf_mask])
protected_rids = self.nrp.protected_rids.union(protected_sid_rids) protected_rids = self.nrp.protected_rids.union(protected_sid_rids)
return protected_rids return protected_rids
...@@ -109,7 +155,10 @@ class ModelPruner: ...@@ -109,7 +155,10 @@ class ModelPruner:
pf.set_target_ub(pf.target_frac * pf.optimal) pf.set_target_ub(pf.target_frac * pf.optimal)
def check_protected_parts(self): def check_protected_parts(self):
"""Report on consisteny issues wrt to protected parts.
E.g. report on reactions and metabolties that do not exist in the model
"""
extra_p_rids = self.protected_rids.difference(set(self.fba_model.rids)) extra_p_rids = self.protected_rids.difference(set(self.fba_model.rids))
extra_p_sids = self.protected_sids.difference(set(self.fba_model.sids)) extra_p_sids = self.protected_sids.difference(set(self.fba_model.sids))
pf_rids = set() pf_rids = set()
...@@ -117,27 +166,27 @@ class ModelPruner: ...@@ -117,27 +166,27 @@ class ModelPruner:
pf_rids |= {rid for rid in pf.objective} pf_rids |= {rid for rid in pf.objective}
pf_rids |= {rid for rid in pf.overwrite_bounds} pf_rids |= {rid for rid in pf.overwrite_bounds}
if pf.fba_success is False: if pf.fba_success is False:
print(f'protected function {pf.obj_id} has unsuccessful FBA ' print(f'Protected function {pf.obj_id} has unsuccessful FBA '
f' with optimal value {pf.optimal}') f' with optimal value {pf.optimal}')
extra_pf_rids = pf_rids.difference(set(self.fba_model.rids)) extra_pf_rids = pf_rids.difference(set(self.fba_model.rids))
pf_unprotected_rids = pf_rids.difference(set(self.protected_rids)) pf_unprotected_rids = pf_rids.difference(set(self.protected_rids))
if len(extra_p_rids) > 0: if len(extra_p_rids) > 0:
print('protected reactions that do not exist in model:', extra_p_rids) print('Protected reactions that do not exist in model:', extra_p_rids)
if len(extra_p_sids) > 0: if len(extra_p_sids) > 0:
print('protected metabolites that do not exist in model:', extra_p_sids) print('Protected metabolites that do not exist in model:', extra_p_sids)
if len(extra_pf_rids) > 0: if len(extra_pf_rids) > 0:
print('reactions in protected functions that do not exist in model:', extra_pf_rids) print('Reactions in protected functions that do not exist in model:', extra_pf_rids)
if len(pf_unprotected_rids) > 0: if len(pf_unprotected_rids) > 0:
print('reactions in protected functions that are not protected:', pf_unprotected_rids) print('Reactions in protected functions that are not protected:', pf_unprotected_rids)
blocked_rids = self.reaction_types_fva(self.fba_model.rids)['blocked_rids'] blocked_rids = self.reaction_types_fva(self.fba_model.rids)['blocked_rids']
blocked_p_rids = self.nrp.protected_sids.intersection(blocked_rids) blocked_p_rids = self.nrp.protected_sids.intersection(blocked_rids)
blocked_pf_rids = pf_rids.intersection(blocked_rids) blocked_pf_rids = pf_rids.intersection(blocked_rids)
if len(blocked_p_rids) > 0: if len(blocked_p_rids) > 0:
print('protected reactions that are blocked (no flux):', blocked_p_rids) print('Protected reactions that are blocked (no flux):', blocked_p_rids)
if len(blocked_pf_rids) > 0: if len(blocked_pf_rids) > 0:
print('reactions in protected functions that are blocked (no flux):', blocked_pf_rids) print('Reactions in protected functions that are blocked (no flux):', blocked_pf_rids)
def identify_parallel_reactions(self): def identify_parallel_reactions(self):
"""Identify parallel reactions having same stoichiometry (considering reverse). """Identify parallel reactions having same stoichiometry (considering reverse).
...@@ -183,17 +232,24 @@ class ModelPruner: ...@@ -183,17 +232,24 @@ class ModelPruner:
flux_max_pfs = np.zeros((n_pf, len(free_rids))) flux_max_pfs = np.zeros((n_pf, len(free_rids)))
res = {} res = {}
processes = min(self.cpus, len(free_rids)) # setting up multiprocessing takes quite some resources
for idx, pf in enumerate(self.nrp.protected_functions.values()): # there is a tradeoff wrt running on one processor
if processes > 100: processes = min(self.cpus, int(len(free_rids)*n_pf/self.min_mp_total_flux))
# processes = min(self.cpus, len(free_rids))
if processes > 1:
print(f'Multiprocessing with {processes} processes')
frid2idx = {rid: idx for idx, rid in enumerate(free_rids)} frid2idx = {rid: idx for idx, rid in enumerate(free_rids)}
# place to split fva for free_rids over several # delete lp problem prior to pool creation (so FbaProblem can be pickled)
with mp.Pool(processes, initializer=init_worker, initargs=(self.fba_model, pf)) as pool: self.fba_model.delete_fba_lp()
for res in pool.imap_unordered(worker, free_rids): with mp.Pool(processes, initializer=_init_worker,
rid = res['rids'][0] initargs=(self.fba_model, self.nrp.protected_functions)) as pool:
flux_min_pfs[idx, frid2idx[rid]] = res['min'][0] for res in pool.imap_unordered(_worker, np.array_split(free_rids, processes)):
flux_max_pfs[idx, frid2idx[rid]] = res['max'][0] idxs = [frid2idx[rid] for rid in res['rids']]
flux_min_pfs[:, idxs] = res['min_pfs']
flux_max_pfs[:, idxs] = res['max_pfs']
else: else:
for idx, pf in enumerate(self.nrp.protected_functions.values()):
# print(f'\nCheck FVA for pf {pf.obj_id} with {len(free_rids)} free variables') # print(f'\nCheck FVA for pf {pf.obj_id} with {len(free_rids)} free variables')
res = self.fba_model.fva_pf_flux_ranges(pf, free_rids) res = self.fba_model.fva_pf_flux_ranges(pf, free_rids)
if res['success'] is True: if res['success'] is True:
...@@ -262,16 +318,27 @@ class ModelPruner: ...@@ -262,16 +318,27 @@ class ModelPruner:
return reaction_types return reaction_types
def init_worker(self): def prune(self, snapshot_freq=0):
global _fba_model """Prune the metabolic network
_fba_model = copy.deepcopy(self.fba_model)
Once the Metabolic network has been loaded it can be pruned.
With snapshot_freq one can deterimine if (snapshot_freq > 0) and
how often intermediate results shall be store, from which one can resume in future
Step during Pruning
1. identification and removal of parallel reactions
2. check feasibility of the network wrt to all protected functions
3. in a loop till all free reactions have been consumed
3.1 identify types of remaining free reactions using FVA across all protected functions
3.2 blocked reactions (zero flux across all functions) get removed
3.3 essential reactions (strict positive/negative flux across conditions) get protected
3.4 a single candidate reaction (supporting zero flux) gets identify and removed
@staticmethod :param snapshot_freq: frequency to store intermediate results (every x reactions)
def fva_single_rids(rid): :type snapshot_freq: int, optional (default: 0, no snapshots)
return _fba_model.fva_single_rid(rid) """
def prune(self, snapshot_freq=None): print(f'Initial S-matrix: {self.fba_model.s_mat_coo.shape}; '
print(f'initial S-matrix: {self.fba_model.s_mat.shape}; '
f' protected reactions: {len(self.protected_rids)}:', self.protected_rids) f' protected reactions: {len(self.protected_rids)}:', self.protected_rids)
drop_rids = self.identify_parallel_reactions().difference(self.protected_rids) drop_rids = self.identify_parallel_reactions().difference(self.protected_rids)
if len(drop_rids) > 0: if len(drop_rids) > 0:
...@@ -279,13 +346,13 @@ class ModelPruner: ...@@ -279,13 +346,13 @@ class ModelPruner:
print(f'{len(drop_rids)} parallel reaction(s) dropped:', drop_rids) print(f'{len(drop_rids)} parallel reaction(s) dropped:', drop_rids)
feasible = np.array([self.fba_model.check_pf_feasibility(pf) for pf in self.nrp.protected_functions.values()]) feasible = np.array([self.fba_model.check_pf_feasibility(pf) for pf in self.nrp.protected_functions.values()])
print('protected functions feasibility:', np.all(feasible)) print('Protected functions feasibility:', np.all(feasible))
next_snapshot = 0 next_snapshot = 0
if type(snapshot_freq) is int: if snapshot_freq > 0:
next_snapshot = max(0, len(self.fba_model.rids) - snapshot_freq) next_snapshot = max(0, len(self.fba_model.rids) - snapshot_freq)
while len(self.protected_rids) < self.fba_model.s_mat.shape[1]: while len(self.protected_rids) < self.fba_model.s_mat_coo.shape[1]:
free_rids = list(set(self.fba_model.rids).difference(self.protected_rids)) free_rids = list(set(self.fba_model.rids).difference(self.protected_rids))
reaction_types = self.reaction_types_fva(free_rids) reaction_types = self.reaction_types_fva(free_rids)
print() print()
...@@ -299,11 +366,11 @@ class ModelPruner: ...@@ -299,11 +366,11 @@ class ModelPruner:
# try to remove one reaction while still satisfying all protected functions # try to remove one reaction while still satisfying all protected functions
for rid in candidate_rids: for rid in candidate_rids:
# print('check feasibility of', rid) # print('Check feasibility of', rid)
feasible = True feasible = True
for pf in self.nrp.protected_functions.values(): for pf in self.nrp.protected_functions.values():
feasible = self.fba_model.check_pf_feasibility(pf, zero_flux_rid=rid) feasible = self.fba_model.check_pf_feasibility(pf, zero_flux_rid=rid)
# print(f'feasibility of {rid} is {feasible}') # print(f'Feasibility of {rid} is {feasible}')
if feasible is False: if feasible is False:
essential_rids.add(rid) essential_rids.add(rid)
break break
...@@ -317,7 +384,7 @@ class ModelPruner: ...@@ -317,7 +384,7 @@ class ModelPruner:
print(f"{len(essential_rids)} reaction(s) added to protected reactions:", essential_rids) print(f"{len(essential_rids)} reaction(s) added to protected reactions:", essential_rids)
if feasible is False: if feasible is False:
print('no more reactions to remove') print('No more reactions to remove')
# store intermediate results (snapshots) # store intermediate results (snapshots)
if len(self.fba_model.rids) < next_snapshot: if len(self.fba_model.rids) < next_snapshot:
...@@ -328,7 +395,7 @@ class ModelPruner: ...@@ -328,7 +395,7 @@ class ModelPruner:
next_snapshot = max(0, len(self.fba_model.rids) - snapshot_freq) next_snapshot = max(0, len(self.fba_model.rids) - snapshot_freq)
print('snapshot data saved.') print('snapshot data saved.')
print(f'S-matrix: {self.fba_model.s_mat.shape}; ' print(f'S-matrix: {self.fba_model.s_mat_coo.shape}; '
f'protected reactions: {len(self.protected_rids)}') f'protected reactions: {len(self.protected_rids)}')
# check if blocked reactions exist in the finally pruned model # check if blocked reactions exist in the finally pruned model
...@@ -336,10 +403,12 @@ class ModelPruner: ...@@ -336,10 +403,12 @@ class ModelPruner:
# if len(blocked_rids) > 0: # if len(blocked_rids) > 0:
# print(f'{len(blocked_rids)} blocked reaction(s) in pruned model', blocked_rids) # print(f'{len(blocked_rids)} blocked reaction(s) in pruned model', blocked_rids)
# remove any snapshot files def remove_snapshot_files(self):
# for fname in [self._snapshot_sbml, self._snapshot_json]: """Remove any snapshot files still existing"""
# if os.path.exists(fname): for pname in [self._snapshot_sbml, self._snapshot_json]:
# os.remove(fname) if os.path.exists(pname):
os.remove(pname)
print(f'{pname} removed')
def get_reduction_status(self): def get_reduction_status(self):
""" Return status in model reduction process """ Return status in model reduction process
...@@ -355,6 +424,7 @@ class ModelPruner: ...@@ -355,6 +424,7 @@ class ModelPruner:
return status return status
def print_status(self): def print_status(self):
"""Print status of pruning process"""
status = self.get_reduction_status() status = self.get_reduction_status()
print("reactions removed/remaining/protected: " print("reactions removed/remaining/protected: "
...@@ -362,11 +432,21 @@ class ModelPruner: ...@@ -362,11 +432,21 @@ class ModelPruner:
f"/{len(self.protected_rids):4d}; dof: {status['dof']:3d}") f"/{len(self.protected_rids):4d}; dof: {status['dof']:3d}")
def export_pruned_model(self, pruned_sbml): def export_pruned_model(self, pruned_sbml):
"""Export the pruned network (or snapshot) to SBML.
Component information of initial network will be carried over,
e.g. annotation data, groups, gene products
:param pruned_sbml: pathname of pruned network, ending with '.xml'
:type pruned_sbml: str
"""
success = self.fba_model.export_pruned_model(pruned_sbml) success = self.fba_model.export_pruned_model(pruned_sbml)
if success is True: if success is True:
print('pruned model exported to', pruned_sbml) print('Pruned model exported to', pruned_sbml)
# self.remove_snapshot_files()
else: else:
print('pruned model could not be exported') print('Pruned model could not be exported')
def __del__(self): def __del__(self):
"""Delete corresponding LpProblem"""
del self.fba_model del self.fba_model
...@@ -5,6 +5,7 @@ import time ...@@ -5,6 +5,7 @@ import time
import re import re
import math import math
import numpy as np import numpy as np
from scipy import sparse
import sbmlxdf import sbmlxdf
from modelpruner.problems.lp_problem import LpProblem from modelpruner.problems.lp_problem import LpProblem
...@@ -41,12 +42,13 @@ class FbaModel: ...@@ -41,12 +42,13 @@ class FbaModel:
print(f'reaction {idx} set to reversible') print(f'reaction {idx} set to reversible')
self.rids_rev = self.rids[self.reversible] self.rids_rev = self.rids[self.reversible]
self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)} self.rid2idx_rev = {rid: idx + len(self.rids) for idx, rid in enumerate(self.rids_rev)}
self.s_mat = sbml_model.get_s_matrix().to_numpy() # using a sparse matrix speeds up significantly pickling (factor > 350 for S-matrix)
# pickling required to support multiprocessing
# also data exchange from sbmlxdf.Model and towards LpProblem improves
self.s_mat_coo = sparse.coo_matrix(sbml_model.get_s_matrix(sparse=True))
# TODO: create LP Problem only once (but what is impact on deepcopy() # lp problem will only be created once required.
# function to assign new LP problem # this gives chances to update e.g. flux bounds and to support pickling (multiprocessing)
# lp problem will only be created once required. This gives chances to update e.g. flux bounds
self.lp = None self.lp = None
def update_reversible_rids(self): def update_reversible_rids(self):
...@@ -87,11 +89,13 @@ class FbaModel: ...@@ -87,11 +89,13 @@ class FbaModel:
:return: lists of reaction ids that are parallel :return: lists of reaction ids that are parallel
:rtype: list with lists of str :rtype: list with lists of str
""" """
fwd_rev_s_mat = np.hstack((self.s_mat, -self.s_mat[:, self.reversible]))
fwd_rev_s_mat_coo = sparse.hstack([self.s_mat_coo,
-self.s_mat_coo.tocsc()[:, self.reversible]])
fwd_rev_rids = np.hstack((self.rids, self.rids_rev)) fwd_rev_rids = np.hstack((self.rids, self.rids_rev))
# identify groups of parallel reactions # identify groups of parallel reactions
sorted_idx = np.lexsort(fwd_rev_s_mat) sorted_idx = np.lexsort(fwd_rev_s_mat_coo.todense())[0]
groups = [] groups = []
consumed_idxs = set() consumed_idxs = set()
for pos in range(len(sorted_idx)): for pos in range(len(sorted_idx)):
...@@ -100,7 +104,7 @@ class FbaModel: ...@@ -100,7 +104,7 @@ class FbaModel:
group = [idx] group = [idx]
for adjacent_pos in range(pos + 1, len(sorted_idx)): for adjacent_pos in range(pos + 1, len(sorted_idx)):
candiate_idx = sorted_idx[adjacent_pos] candiate_idx = sorted_idx[adjacent_pos]
if np.all(fwd_rev_s_mat[:, idx] == fwd_rev_s_mat[:, candiate_idx]): if (fwd_rev_s_mat_coo.getcol(idx) != fwd_rev_s_mat_coo.getcol(candiate_idx)).getnnz() == 0:
consumed_idxs.add(candiate_idx) consumed_idxs.add(candiate_idx)
group.append(candiate_idx) group.append(candiate_idx)
else: else:
...@@ -131,14 +135,24 @@ class FbaModel: ...@@ -131,14 +135,24 @@ class FbaModel:
:returns: status information in a dictionary :returns: status information in a dictionary
:rtype: dict :rtype: dict
""" """
status = {'dof': self.s_mat.shape[1] - np.linalg.matrix_rank(self.s_mat), status = {'dof': self.s_mat_coo.shape[1] - np.linalg.matrix_rank(self.s_mat_coo.todense()),
'n_rids': len(self.rids), 'n_rids': len(self.rids),
'n_sids': len(self.sids)} 'n_sids': len(self.sids)}
status.update(self.full_model_shape) status.update(self.full_model_shape)
return status return status
def export_pruned_model(self, pruned_sbml): def export_pruned_model(self, pruned_sbml):
"""Export the pruned network (or snapshot) to SBML.
Component information of initial network will be carried over,
e.g. annotation data, groups, gene products
History information, if available, will be updated.
Model will also be validated with respect to SBML compliance.
:param pruned_sbml: pathname of pruned network, ending with '.xml'
:type pruned_sbml: str
"""
sbml_model = sbmlxdf.Model(self.sbml_fname) sbml_model = sbmlxdf.Model(self.sbml_fname)
model_dict = sbml_model.to_df() model_dict = sbml_model.to_df()
pruned_mdict = model_dict.copy() pruned_mdict = model_dict.copy()
...@@ -147,8 +161,11 @@ class FbaModel: ...@@ -147,8 +161,11 @@ class FbaModel:
# update model attributes (ids, names and modification date) # update model attributes (ids, names and modification date)
pruned_mdict['modelAttrs']['id'] += '_pruned' pruned_mdict['modelAttrs']['id'] += '_pruned'
if 'name' in pruned_mdict['modelAttrs']:
pruned_mdict['modelAttrs']['name'] += '_pruned' pruned_mdict['modelAttrs']['name'] += '_pruned'
if 'metaid' in pruned_mdict['modelAttrs']:
pruned_mdict['modelAttrs']['metaid'] += '_pruned' pruned_mdict['modelAttrs']['metaid'] += '_pruned'
if 'modifiedHistory' in pruned_mdict['modelAttrs']:
pruned_mdict['modelAttrs']['modifiedHistory'] += '; localtime' pruned_mdict['modelAttrs']['modifiedHistory'] += '; localtime'
# clean groups table # clean groups table
...@@ -203,13 +220,23 @@ class FbaModel: ...@@ -203,13 +220,23 @@ class FbaModel:
:return: LpProblem configured for FBA :return: LpProblem configured for FBA
:rtype: LpProblem :rtype: LpProblem
""" """
fwd_rev_s_mat = np.hstack((self.s_mat[:], -self.s_mat[:, self.reversible])) fwd_rev_s_mat_coo = sparse.hstack([self.s_mat_coo,
-self.s_mat_coo.tocsc()[:, self.reversible]])
fwd_rev_col_bnds = np.hstack((np.fmax(self.flux_bounds[:], 0), fwd_rev_col_bnds = np.hstack((np.fmax(self.flux_bounds[:], 0),
np.fmax(np.flip(-self.flux_bounds[:, self.reversible], axis=0), 0))) np.fmax(np.flip(-self.flux_bounds[:, self.reversible], axis=0), 0)))
row_bnds = np.zeros((fwd_rev_s_mat.shape[0], 2)) row_bnds = np.zeros((fwd_rev_s_mat_coo.shape[0], 2))
lp = LpProblem(fwd_rev_s_mat, row_bnds, fwd_rev_col_bnds) lp = LpProblem(fwd_rev_s_mat_coo, row_bnds, fwd_rev_col_bnds)
return lp return lp
def delete_fba_lp(self):
"""Delete the corresponding linear problem.
E.g. required for multiprocessing so FbaModel can be pickled
"""
if self.lp is not None:
del self.lp
self.lp = None
def set_lp_objective(self, objective, direction='maximize'): def set_lp_objective(self, objective, direction='maximize'):
"""Set objective coefficients and direction in linear problem """Set objective coefficients and direction in linear problem
...@@ -241,7 +268,7 @@ class FbaModel: ...@@ -241,7 +268,7 @@ class FbaModel:
""" """
if self.lp is None: if self.lp is None:
self.lp = self.create_core_fba_lp() self.lp = self.create_core_fba_lp()
print('Core LP Problem created') # print('Core LP Problem created')
for rid, bound_upd in flux_bounds.items(): for rid, bound_upd in flux_bounds.items():
if rid in self.rids: if rid in self.rids:
temp_bounds = self.flux_bounds[:, self.rid2idx[rid]].copy() temp_bounds = self.flux_bounds[:, self.rid2idx[rid]].copy()
...@@ -291,6 +318,9 @@ class FbaModel: ...@@ -291,6 +318,9 @@ class FbaModel:
:param drop_rids: reaction ids to be removed from the model :param drop_rids: reaction ids to be removed from the model
:type drop_rids: set of str :type drop_rids: set of str
""" """
if self.lp is None:
self.lp = self.create_core_fba_lp()
# print('Core LP Problem created')
assert self.lp is not None assert self.lp is not None
# remove variables (columns) from linear problem # remove variables (columns) from linear problem
...@@ -304,16 +334,16 @@ class FbaModel: ...@@ -304,16 +334,16 @@ class FbaModel:
self.reversible = self.reversible[keep_rids_mask] self.reversible = self.reversible[keep_rids_mask]
self.update_reversible_rids() self.update_reversible_rids()
self.flux_bounds = self.flux_bounds[:, keep_rids_mask] self.flux_bounds = self.flux_bounds[:, keep_rids_mask]
self.s_mat = self.s_mat[:, keep_rids_mask] self.s_mat_coo = self.s_mat_coo.tocsc()[:, keep_rids_mask].tocoo()
# drop blocked metabolites due to deleted reactions # drop blocked metabolites due to deleted reactions
keep_sids_mask = np.any(self.s_mat != 0, axis=1) keep_sids_mask = (self.s_mat_coo.getnnz(axis=1) > 0)
row_idxs0 = [idx for idx, val in enumerate(keep_sids_mask) if val == np.bool_(False)] row_idxs0 = [idx for idx, val in enumerate(keep_sids_mask) if val == np.bool_(False)]
if len(row_idxs0) > 0: if len(row_idxs0) > 0:
self.lp.remove_rows(row_idxs0) self.lp.remove_rows(row_idxs0)
self.sids = self.sids[keep_sids_mask] self.sids = self.sids[keep_sids_mask]
self.sid2idx = {sid: idx for idx, sid in enumerate(self.sids)} self.sid2idx = {sid: idx for idx, sid in enumerate(self.sids)}
self.s_mat = self.s_mat[keep_sids_mask, :] self.s_mat_coo = self.s_mat_coo.tocsr()[keep_sids_mask, :].tocoo()
def fba_pf_optimize(self, pf, zero_flux_rid=None): def fba_pf_optimize(self, pf, zero_flux_rid=None):
"""FBA optimization of current problem """FBA optimization of current problem
...@@ -329,7 +359,7 @@ class FbaModel: ...@@ -329,7 +359,7 @@ class FbaModel:
""" """
if self.lp is None: if self.lp is None:
self.lp = self.create_core_fba_lp() self.lp = self.create_core_fba_lp()
print('Core LP Problem created') # print('Core LP Problem created')
assert self.lp is not None assert self.lp is not None
if zero_flux_rid is not None: if zero_flux_rid is not None:
...@@ -430,9 +460,6 @@ class FbaModel: ...@@ -430,9 +460,6 @@ class FbaModel:
result.__dict__['lp'] = None result.__dict__['lp'] = None
return result return result
def __reduce__(self):
return self.__class__, (self.sbml_fname, )
def __del__(self): def __del__(self):
print('deletion of fba_model') print('deletion of fba_model')
del self.lp del self.lp
...@@ -6,8 +6,6 @@ Peter Schubert, HHU Duesseldorf, June 2022 ...@@ -6,8 +6,6 @@ Peter Schubert, HHU Duesseldorf, June 2022
""" """
import numpy as np import numpy as np
import scipy
import scipy.sparse
import swiglpk as glpk import swiglpk as glpk
...@@ -61,13 +59,13 @@ scaling_options = {'GM': glpk.GLP_SF_GM, ...@@ -61,13 +59,13 @@ scaling_options = {'GM': glpk.GLP_SF_GM,
class LpProblem: class LpProblem:
def __init__(self, constr_coefs, row_bnds, col_bnds): def __init__(self, constr_coefs_coo, row_bnds, col_bnds):
"""create core linear problem (constraints, variable and row bounds) """create core linear problem (constraints, variable and row bounds)
Problem needs to be properly deleted usin del(LpProblem) Problem needs to be properly deleted usin del(LpProblem)
Note: glpk related indices start at 1 (not zero) Note: glpk related indices start at 1 (not zero)
:param constr_coefs: matrix with constraint coefficients :param constr_coefs_coo: matrix with constraint coefficients
:type constr_coefs: 2D nparray with shape(nrows, ncols) :type constr_coefs_coo: scipy.sparce.coo_matrix with shape(nrows, ncols)
:param row_bnds: lower and upper bounds of auxhiliary (row) variables :param row_bnds: lower and upper bounds of auxhiliary (row) variables
:type row_bnds: 2D ndarray of shape(nrows, 2) (1st col: lower bounds, 2nd col: upper bounds) :type row_bnds: 2D ndarray of shape(nrows, 2) (1st col: lower bounds, 2nd col: upper bounds)
:param col_bnds: lower and upper bounds of structural (col) variables :param col_bnds: lower and upper bounds of structural (col) variables
...@@ -78,7 +76,7 @@ class LpProblem: ...@@ -78,7 +76,7 @@ class LpProblem:
glp_presolve = glpk.GLP_OFF # or glpk.GLP_OFF (default) or GLP_ON glp_presolve = glpk.GLP_OFF # or glpk.GLP_OFF (default) or GLP_ON
self.res = {} self.res = {}
self.nrows, self.ncols = constr_coefs.shape self.nrows, self.ncols = constr_coefs_coo.shape
assert col_bnds.shape == (2, self.ncols) assert col_bnds.shape == (2, self.ncols)
assert row_bnds.shape == (self.nrows, 2) assert row_bnds.shape == (self.nrows, 2)
...@@ -95,15 +93,14 @@ class LpProblem: ...@@ -95,15 +93,14 @@ class LpProblem:
for i, lb_ub in enumerate(row_bnds): for i, lb_ub in enumerate(row_bnds):
var_type, lb, ub = self._get_variable_type(lb_ub) var_type, lb, ub = self._get_variable_type(lb_ub)
glpk.glp_set_row_bnds(self.lp, 1 + i, var_type, lb, ub) glpk.glp_set_row_bnds(self.lp, 1 + i, var_type, lb, ub)
constr_coefs_sparse = scipy.sparse.coo_matrix(constr_coefs) n_coefs = len(constr_coefs_coo.row)
n_coefs = len(constr_coefs_sparse.row)
ia = glpk.intArray(1 + n_coefs) ia = glpk.intArray(1 + n_coefs)
ja = glpk.intArray(1 + n_coefs) ja = glpk.intArray(1 + n_coefs)
ar = glpk.doubleArray(1 + n_coefs) ar = glpk.doubleArray(1 + n_coefs)
for i in range(n_coefs): for i in range(n_coefs):
ia[1 + i] = int(1 + constr_coefs_sparse.row[i]) ia[1 + i] = int(1 + constr_coefs_coo.row[i])
ja[1 + i] = int(1 + constr_coefs_sparse.col[i]) ja[1 + i] = int(1 + constr_coefs_coo.col[i])
ar[1 + i] = constr_coefs_sparse.data[i] ar[1 + i] = constr_coefs_coo.data[i]
glpk.glp_load_matrix(self.lp, n_coefs, ia, ja, ar) glpk.glp_load_matrix(self.lp, n_coefs, ia, ja, ar)
# configure options # configure options
...@@ -307,7 +304,7 @@ class LpProblem: ...@@ -307,7 +304,7 @@ class LpProblem:
def __del__(self): def __del__(self):
"""Explicitly delete the LpProblem. (can we do without?) """Explicitly delete the LpProblem. (can we do without?)
""" """
print('deletion of LpProblem') # print('deletion of LpProblem')
assert self.lp is not None assert self.lp is not None
if getattr(self, 'lp'): if getattr(self, 'lp'):
glpk.glp_delete_prob(self.lp) glpk.glp_delete_prob(self.lp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment