Skip to content
Snippets Groups Projects
Commit 0a9159d6 authored by Peter Schubert's avatar Peter Schubert
Browse files

start implementing mp

parent 50ebc192
No related branches found
No related tags found
No related merge requests found
......@@ -4,43 +4,32 @@ import sys
import os
import re
import time
import copy
import numpy as np
import multiprocessing as mp
import pickle
import sbmlxdf
from modelpruner.models.fba_model import FbaModel
from modelpruner.core.protected_parts import ProtectedParts
global _fba_model
class ModelPruner:
def __init__(self, sbml_fname, protected_parts, reduced_fname=None, resume=False):
self.tolerance = 1e-8 # later accessible by set_params() ?
self.cpus = int(os.cpu_count()*.8)
if reduced_fname is None:
self.reduced_fname = re.sub(r'.xml$', '_red.xml', sbml_fname)
self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
self._snapshot_pkl = re.sub(r'.xml$', '_snapshot.pkl', sbml_fname)
if resume is True:
if os.path.exists(self._snapshot_sbml):
self.sbml_model = sbmlxdf.Model(self._snapshot_sbml)
print(f'Resume from snapshot from {self._snapshot_sbml} '
f'(last modified: {time.ctime(os.path.getmtime(self._snapshot_sbml))})')
else:
print(f'{self._snapshot_sbml} not found!')
raise FileNotFoundError
else:
if os.path.exists(sbml_fname):
self.sbml_model = sbmlxdf.Model(sbml_fname)
print(f'Full SBML model loaded from {sbml_fname} '
f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
else:
print(f'{sbml_fname} not found!')
raise FileNotFoundError
self.tolerance = 1e-8 # later accessible by set_params() ?
self.fba_model = FbaModel(self.sbml_model)
sbml_fname = self._snapshot_sbml
self.fba_model = FbaModel(sbml_fname)
self.nrp = ProtectedParts(protected_parts)
self.protected_sids = self.nrp.initial_protected_sids
......@@ -139,7 +128,6 @@ class ModelPruner:
print('no more reactions to remove')
# store intermediate results (snapshots)
print(f'snapshot check next: {next_snapshot}, rids: {len(self.fba_model.rids)}')
if (next_snapshot is not None) and (len(self.fba_model.rids) < next_snapshot):
self.export_pruned_model(self._snapshot_sbml)
with open(self._snapshot_pkl, 'wb') as f:
......@@ -160,45 +148,72 @@ class ModelPruner:
if os.path.exists(fname):
os.remove(fname)
def reaction_types_fva(self, free_rids):
def init_worker(self):
global _fba_model
_fba_model = copy.deepcopy(self.fba_model)
@staticmethod
def fva_single_rids(rid):
return _fba_model.fva_single_rid(rid)
flux_min_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids)))
flux_max_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids)))
def reaction_types_fva(self, free_rids):
print(time.strftime("%H:%M:%S", time.localtime()))
n_pf = len(self.nrp.protected_functions)
flux_min_pfs = np.zeros((n_pf, len(free_rids)))
flux_max_pfs = np.zeros((n_pf, len(free_rids)))
processes = min(self.cpus, len(free_rids))
for idx, pf in enumerate(self.nrp.protected_functions.values()):
self.set_temp_func_conditions(pf)
if processes > 20:
# place to split fva for free_rids over several
rid2idx = {rid: idx for idx, rid in enumerate(free_rids)}
pool = mp.Pool(processes, initializer=self.init_worker)
for rid, res in pool.imap_unordered(self.fva_single_rids, free_rids):
flux_min_pfs[idx, rid2idx[rid]] = res['min']
flux_max_pfs[idx, rid2idx[rid]] = res['max']
pool.close()
pool.join()
else:
res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0)
self.restore_base_conditions()
if res['success'] is False:
print('FVA unsuccessful')
return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
flux_min_pfs[idx] = res['min']
flux_max_pfs[idx] = res['max']
self.restore_base_conditions()
# if res['success'] is False:
# print('FVA unsuccessful')
# return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
sys.stdout.write('\rFVA[{}{}] {:3.0f}%'.format(
'=' * int(idx+1), ' ' * int(n_pf - idx-1), (idx+1)/n_pf*100))
sys.stdout.flush()
print()
# analyze flux ranges per reaction, aggregate fluxes
flux_min_pfs[np.abs(flux_min_pfs) < self.tolerance] = 0.0
flux_max_pfs[np.abs(flux_max_pfs) < self.tolerance] = 0.0
flux_min = np.min(flux_min_pfs, axis=0)
flux_max = np.max(flux_max_pfs, axis=0)
# blocked reactions do not carry any flux in all conditions
mask_b = np.all(np.vstack((flux_min == 0, flux_max == 0)), axis=0)
blocked_rids = set(np.array(free_rids)[mask_b])
# essential reactions carry either only forward or reverse flux, exluding zero flux
mask_e1 = np.all(np.vstack((flux_min < 0, flux_max < 0)), axis=0)
mask_e2 = np.all(np.vstack((flux_min > 0, flux_max > 0)), axis=0)
mask_e = np.any(np.vstack((mask_e1, mask_e2)), axis=0)
essential_rids = set(np.array(free_rids)[mask_e])
# reactions that support zero flux are considered candiates for removal
# here such reactions are sorted from smallest to largest flux span
mask_c = np.all(np.vstack((np.logical_not(mask_b), np.logical_not(mask_e))), axis=0)
flux_span = flux_max - flux_min
ind = np.argsort(flux_span)
candidate_rids_sorted = list(np.array(free_rids)[ind][np.array(mask_c)[ind]])
return {'blocked_rids': blocked_rids, 'essential_rids': essential_rids,
'candidate_rids_sorted': candidate_rids_sorted}
......
import os
import time
import re
import math
import numpy as np
......@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem
class FbaModel:
def __init__(self, sbml_model):
def __init__(self, sbml_fname):
self.model_dict = sbml_model.to_df()
df_reactions = self.model_dict['reactions']
df_species = self.model_dict['species']
df_fbc_objectives = self.model_dict['fbcObjectives']
self.sbml_fname = sbml_fname
if os.path.exists(sbml_fname):
sbml_model = sbmlxdf.Model(sbml_fname)
print(f'SBML model loaded from {sbml_fname} '
f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
else:
print(f'{sbml_fname} not found!')
raise FileNotFoundError
model_dict = sbml_model.to_df()
df_reactions = model_dict['reactions']
df_species = model_dict['species']
df_fbc_objectives = model_dict['fbcObjectives']
self.full_model_shape = {'n_full_rids': len(df_reactions),
'n_full_sids': len(df_species)}
......@@ -28,6 +39,11 @@ class FbaModel:
df_reactions['fbcUb'].to_numpy()))
self.s_mat = sbml_model.get_s_matrix().to_numpy()
# TODO: create LP Problem only once (but what is impact on deepcopy()
# function to assign new LP problem
# TODO: delete rows / cols in LP problem on removal of rids, sids
# TODO: prior to solve, add objective, thereafter immediatly remove objective
self.objective = {}
for oid, row in df_fbc_objectives.iterrows():
if row['active'] is True:
......@@ -135,6 +151,7 @@ class FbaModel:
rids = self.rids
flux_min = np.zeros(len(rids))
flux_max = np.zeros(len(rids))
# fix FBA objective as constraint
fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
if self.obj_dir == 'maximize':
......@@ -164,6 +181,53 @@ class FbaModel:
del lp
return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
def fva_single_rid(self, rids):
"""FVA - flux variability analysis for all or selected reactions.
:param rids: selected reaction ids to run FVA (alternatively, all)
:type rids: list of strings (default: None - all reactions)
:return: rid, min and max flux values
:rtype: dict
"""
lp = self.create_fba_lp()
if lp is None:
return {'success': False, 'message': 'not an FBA model'}
res = lp.solve(short_result=True)
if res['success'] is False:
del lp
return {'success': False, 'message': res['generic_status']}
if rids is None:
rids = self.rids
flux_min = np.zeros(len(rids))
flux_max = np.zeros(len(rids))
# fix FBA objective as constraint
fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
if self.obj_dir == 'maximize':
row_bds = np.array([0.0, np.nan])
else:
row_bds = np.array([np.nan, 1000.0])
lp.add_constraint(fwd_bwd_obj_coefs, row_bds)
n_cols = len(self.rids)
for idx, rid in enumerate(rids):
# select reaction maximize/minimize flux
new_obj_coefs = np.zeros(n_cols)
new_obj_coefs[self.rid2idx[rid]] = 1
new_fwd_bwd_obj_coefs = np.hstack((new_obj_coefs, -new_obj_coefs[self.reversible]))
lp.set_obj_coefs(new_fwd_bwd_obj_coefs)
lp.set_obj_dir('minimize')
res = lp.solve(short_result=True)
flux_min[idx] = res['fun'] if res['success'] else np.nan
lp.set_obj_dir('maximize')
res = lp.solve(short_result=True)
flux_max[idx] = res['fun'] if res['success'] else np.nan
del lp
return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
def update_flux_bounds(self, flux_bounds):
"""selectivly set lower/upper flux bounds for given reactions.
......@@ -269,9 +333,11 @@ class FbaModel:
def export_pruned_model(self, pruned_sbml):
pruned_mdict = self.model_dict.copy()
pruned_mdict['reactions'] = self.model_dict['reactions'].loc[self.rids]
pruned_mdict['species'] = self.model_dict['species'].loc[self.sids]
sbml_model = sbmlxdf.Model(self.sbml_fname)
model_dict = sbml_model.to_df()
pruned_mdict = model_dict.copy()
pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids]
pruned_mdict['species'] = model_dict['species'].loc[self.sids]
# update model attributes (ids, names and modification date)
pruned_mdict['modelAttrs']['id'] += '_pruned'
......
......@@ -258,6 +258,7 @@ class LpProblem:
glpk.glp_scale_prob(self.lp, opt)
# sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))]
# rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))]
# TODO: glpk.glp_adv_basis(self.lp, 0)
simplex_result = glpk.glp_simplex(self.lp, smcp)
return self.results(simplex_result, short_result)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment