Skip to content
Snippets Groups Projects
Commit 0a9159d6 authored by Peter Schubert's avatar Peter Schubert
Browse files

start implementing mp

parent 50ebc192
No related branches found
No related tags found
No related merge requests found
...@@ -4,43 +4,32 @@ import sys ...@@ -4,43 +4,32 @@ import sys
import os import os
import re import re
import time import time
import copy
import numpy as np import numpy as np
import multiprocessing as mp
import pickle import pickle
import sbmlxdf
from modelpruner.models.fba_model import FbaModel from modelpruner.models.fba_model import FbaModel
from modelpruner.core.protected_parts import ProtectedParts from modelpruner.core.protected_parts import ProtectedParts
global _fba_model
class ModelPruner: class ModelPruner:
def __init__(self, sbml_fname, protected_parts, reduced_fname=None, resume=False): def __init__(self, sbml_fname, protected_parts, reduced_fname=None, resume=False):
self.tolerance = 1e-8 # later accessible by set_params() ?
self.cpus = int(os.cpu_count()*.8)
if reduced_fname is None: if reduced_fname is None:
self.reduced_fname = re.sub(r'.xml$', '_red.xml', sbml_fname) self.reduced_fname = re.sub(r'.xml$', '_red.xml', sbml_fname)
self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname) self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
self._snapshot_pkl = re.sub(r'.xml$', '_snapshot.pkl', sbml_fname) self._snapshot_pkl = re.sub(r'.xml$', '_snapshot.pkl', sbml_fname)
if resume is True: if resume is True:
if os.path.exists(self._snapshot_sbml): sbml_fname = self._snapshot_sbml
self.sbml_model = sbmlxdf.Model(self._snapshot_sbml) self.fba_model = FbaModel(sbml_fname)
print(f'Resume from snapshot from {self._snapshot_sbml} '
f'(last modified: {time.ctime(os.path.getmtime(self._snapshot_sbml))})')
else:
print(f'{self._snapshot_sbml} not found!')
raise FileNotFoundError
else:
if os.path.exists(sbml_fname):
self.sbml_model = sbmlxdf.Model(sbml_fname)
print(f'Full SBML model loaded from {sbml_fname} '
f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
else:
print(f'{sbml_fname} not found!')
raise FileNotFoundError
self.tolerance = 1e-8 # later accessible by set_params() ?
self.fba_model = FbaModel(self.sbml_model)
self.nrp = ProtectedParts(protected_parts) self.nrp = ProtectedParts(protected_parts)
self.protected_sids = self.nrp.initial_protected_sids self.protected_sids = self.nrp.initial_protected_sids
...@@ -139,7 +128,6 @@ class ModelPruner: ...@@ -139,7 +128,6 @@ class ModelPruner:
print('no more reactions to remove') print('no more reactions to remove')
# store intermediate results (snapshots) # store intermediate results (snapshots)
print(f'snapshot check next: {next_snapshot}, rids: {len(self.fba_model.rids)}')
if (next_snapshot is not None) and (len(self.fba_model.rids) < next_snapshot): if (next_snapshot is not None) and (len(self.fba_model.rids) < next_snapshot):
self.export_pruned_model(self._snapshot_sbml) self.export_pruned_model(self._snapshot_sbml)
with open(self._snapshot_pkl, 'wb') as f: with open(self._snapshot_pkl, 'wb') as f:
...@@ -160,45 +148,72 @@ class ModelPruner: ...@@ -160,45 +148,72 @@ class ModelPruner:
if os.path.exists(fname): if os.path.exists(fname):
os.remove(fname) os.remove(fname)
def reaction_types_fva(self, free_rids): def init_worker(self):
global _fba_model
_fba_model = copy.deepcopy(self.fba_model)
@staticmethod
def fva_single_rids(rid):
return _fba_model.fva_single_rid(rid)
flux_min_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids))) def reaction_types_fva(self, free_rids):
flux_max_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids)))
print(time.strftime("%H:%M:%S", time.localtime()))
n_pf = len(self.nrp.protected_functions) n_pf = len(self.nrp.protected_functions)
flux_min_pfs = np.zeros((n_pf, len(free_rids)))
flux_max_pfs = np.zeros((n_pf, len(free_rids)))
processes = min(self.cpus, len(free_rids))
for idx, pf in enumerate(self.nrp.protected_functions.values()): for idx, pf in enumerate(self.nrp.protected_functions.values()):
self.set_temp_func_conditions(pf) self.set_temp_func_conditions(pf)
if processes > 20:
# place to split fva for free_rids over several
rid2idx = {rid: idx for idx, rid in enumerate(free_rids)}
pool = mp.Pool(processes, initializer=self.init_worker)
for rid, res in pool.imap_unordered(self.fva_single_rids, free_rids):
flux_min_pfs[idx, rid2idx[rid]] = res['min']
flux_max_pfs[idx, rid2idx[rid]] = res['max']
pool.close()
pool.join()
else:
res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0) res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0)
self.restore_base_conditions()
if res['success'] is False:
print('FVA unsuccessful')
return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
flux_min_pfs[idx] = res['min'] flux_min_pfs[idx] = res['min']
flux_max_pfs[idx] = res['max'] flux_max_pfs[idx] = res['max']
self.restore_base_conditions()
# if res['success'] is False:
# print('FVA unsuccessful')
# return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
sys.stdout.write('\rFVA[{}{}] {:3.0f}%'.format( sys.stdout.write('\rFVA[{}{}] {:3.0f}%'.format(
'=' * int(idx+1), ' ' * int(n_pf - idx-1), (idx+1)/n_pf*100)) '=' * int(idx+1), ' ' * int(n_pf - idx-1), (idx+1)/n_pf*100))
sys.stdout.flush() sys.stdout.flush()
print() print()
# analyze flux ranges per reaction, aggregate fluxes
flux_min_pfs[np.abs(flux_min_pfs) < self.tolerance] = 0.0 flux_min_pfs[np.abs(flux_min_pfs) < self.tolerance] = 0.0
flux_max_pfs[np.abs(flux_max_pfs) < self.tolerance] = 0.0 flux_max_pfs[np.abs(flux_max_pfs) < self.tolerance] = 0.0
flux_min = np.min(flux_min_pfs, axis=0) flux_min = np.min(flux_min_pfs, axis=0)
flux_max = np.max(flux_max_pfs, axis=0) flux_max = np.max(flux_max_pfs, axis=0)
# blocked reactions do not carry any flux in all conditions
mask_b = np.all(np.vstack((flux_min == 0, flux_max == 0)), axis=0) mask_b = np.all(np.vstack((flux_min == 0, flux_max == 0)), axis=0)
blocked_rids = set(np.array(free_rids)[mask_b]) blocked_rids = set(np.array(free_rids)[mask_b])
# essential reactions carry either only forward or reverse flux, exluding zero flux
mask_e1 = np.all(np.vstack((flux_min < 0, flux_max < 0)), axis=0) mask_e1 = np.all(np.vstack((flux_min < 0, flux_max < 0)), axis=0)
mask_e2 = np.all(np.vstack((flux_min > 0, flux_max > 0)), axis=0) mask_e2 = np.all(np.vstack((flux_min > 0, flux_max > 0)), axis=0)
mask_e = np.any(np.vstack((mask_e1, mask_e2)), axis=0) mask_e = np.any(np.vstack((mask_e1, mask_e2)), axis=0)
essential_rids = set(np.array(free_rids)[mask_e]) essential_rids = set(np.array(free_rids)[mask_e])
# reactions that support zero flux are considered candiates for removal
# here such reactions are sorted from smallest to largest flux span
mask_c = np.all(np.vstack((np.logical_not(mask_b), np.logical_not(mask_e))), axis=0) mask_c = np.all(np.vstack((np.logical_not(mask_b), np.logical_not(mask_e))), axis=0)
flux_span = flux_max - flux_min flux_span = flux_max - flux_min
ind = np.argsort(flux_span) ind = np.argsort(flux_span)
candidate_rids_sorted = list(np.array(free_rids)[ind][np.array(mask_c)[ind]]) candidate_rids_sorted = list(np.array(free_rids)[ind][np.array(mask_c)[ind]])
return {'blocked_rids': blocked_rids, 'essential_rids': essential_rids, return {'blocked_rids': blocked_rids, 'essential_rids': essential_rids,
'candidate_rids_sorted': candidate_rids_sorted} 'candidate_rids_sorted': candidate_rids_sorted}
......
import os
import time
import re import re
import math import math
import numpy as np import numpy as np
...@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem ...@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem
class FbaModel: class FbaModel:
def __init__(self, sbml_model): def __init__(self, sbml_fname):
self.model_dict = sbml_model.to_df() self.sbml_fname = sbml_fname
df_reactions = self.model_dict['reactions'] if os.path.exists(sbml_fname):
df_species = self.model_dict['species'] sbml_model = sbmlxdf.Model(sbml_fname)
df_fbc_objectives = self.model_dict['fbcObjectives'] print(f'SBML model loaded from {sbml_fname} '
f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
else:
print(f'{sbml_fname} not found!')
raise FileNotFoundError
model_dict = sbml_model.to_df()
df_reactions = model_dict['reactions']
df_species = model_dict['species']
df_fbc_objectives = model_dict['fbcObjectives']
self.full_model_shape = {'n_full_rids': len(df_reactions), self.full_model_shape = {'n_full_rids': len(df_reactions),
'n_full_sids': len(df_species)} 'n_full_sids': len(df_species)}
...@@ -28,6 +39,11 @@ class FbaModel: ...@@ -28,6 +39,11 @@ class FbaModel:
df_reactions['fbcUb'].to_numpy())) df_reactions['fbcUb'].to_numpy()))
self.s_mat = sbml_model.get_s_matrix().to_numpy() self.s_mat = sbml_model.get_s_matrix().to_numpy()
# TODO: create LP Problem only once (but what is impact on deepcopy()
# function to assign new LP problem
# TODO: delete rows / cols in LP problem on removal of rids, sids
# TODO: prior to solve, add objective, thereafter immediatly remove objective
self.objective = {} self.objective = {}
for oid, row in df_fbc_objectives.iterrows(): for oid, row in df_fbc_objectives.iterrows():
if row['active'] is True: if row['active'] is True:
...@@ -135,6 +151,7 @@ class FbaModel: ...@@ -135,6 +151,7 @@ class FbaModel:
rids = self.rids rids = self.rids
flux_min = np.zeros(len(rids)) flux_min = np.zeros(len(rids))
flux_max = np.zeros(len(rids)) flux_max = np.zeros(len(rids))
# fix FBA objective as constraint # fix FBA objective as constraint
fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible])) fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
if self.obj_dir == 'maximize': if self.obj_dir == 'maximize':
...@@ -164,6 +181,53 @@ class FbaModel: ...@@ -164,6 +181,53 @@ class FbaModel:
del lp del lp
return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max} return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
def fva_single_rid(self, rids):
"""FVA - flux variability analysis for all or selected reactions.
:param rids: selected reaction ids to run FVA (alternatively, all)
:type rids: list of strings (default: None - all reactions)
:return: rid, min and max flux values
:rtype: dict
"""
lp = self.create_fba_lp()
if lp is None:
return {'success': False, 'message': 'not an FBA model'}
res = lp.solve(short_result=True)
if res['success'] is False:
del lp
return {'success': False, 'message': res['generic_status']}
if rids is None:
rids = self.rids
flux_min = np.zeros(len(rids))
flux_max = np.zeros(len(rids))
# fix FBA objective as constraint
fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
if self.obj_dir == 'maximize':
row_bds = np.array([0.0, np.nan])
else:
row_bds = np.array([np.nan, 1000.0])
lp.add_constraint(fwd_bwd_obj_coefs, row_bds)
n_cols = len(self.rids)
for idx, rid in enumerate(rids):
# select reaction maximize/minimize flux
new_obj_coefs = np.zeros(n_cols)
new_obj_coefs[self.rid2idx[rid]] = 1
new_fwd_bwd_obj_coefs = np.hstack((new_obj_coefs, -new_obj_coefs[self.reversible]))
lp.set_obj_coefs(new_fwd_bwd_obj_coefs)
lp.set_obj_dir('minimize')
res = lp.solve(short_result=True)
flux_min[idx] = res['fun'] if res['success'] else np.nan
lp.set_obj_dir('maximize')
res = lp.solve(short_result=True)
flux_max[idx] = res['fun'] if res['success'] else np.nan
del lp
return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
def update_flux_bounds(self, flux_bounds): def update_flux_bounds(self, flux_bounds):
"""selectivly set lower/upper flux bounds for given reactions. """selectivly set lower/upper flux bounds for given reactions.
...@@ -269,9 +333,11 @@ class FbaModel: ...@@ -269,9 +333,11 @@ class FbaModel:
def export_pruned_model(self, pruned_sbml): def export_pruned_model(self, pruned_sbml):
pruned_mdict = self.model_dict.copy() sbml_model = sbmlxdf.Model(self.sbml_fname)
pruned_mdict['reactions'] = self.model_dict['reactions'].loc[self.rids] model_dict = sbml_model.to_df()
pruned_mdict['species'] = self.model_dict['species'].loc[self.sids] pruned_mdict = model_dict.copy()
pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids]
pruned_mdict['species'] = model_dict['species'].loc[self.sids]
# update model attributes (ids, names and modification date) # update model attributes (ids, names and modification date)
pruned_mdict['modelAttrs']['id'] += '_pruned' pruned_mdict['modelAttrs']['id'] += '_pruned'
......
...@@ -258,6 +258,7 @@ class LpProblem: ...@@ -258,6 +258,7 @@ class LpProblem:
glpk.glp_scale_prob(self.lp, opt) glpk.glp_scale_prob(self.lp, opt)
# sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))] # sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))]
# rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))] # rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))]
# TODO: glpk.glp_adv_basis(self.lp, 0)
simplex_result = glpk.glp_simplex(self.lp, smcp) simplex_result = glpk.glp_simplex(self.lp, smcp)
return self.results(simplex_result, short_result) return self.results(simplex_result, short_result)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment