diff --git a/modelpruner/core/model_pruner.py b/modelpruner/core/model_pruner.py index 742cf3ef6961b09fc882ef92ca5a5465de658fef..806ac249b937de7ef2666b2153cd6d53efa2fcec 100644 --- a/modelpruner/core/model_pruner.py +++ b/modelpruner/core/model_pruner.py @@ -4,43 +4,32 @@ import sys import os import re import time +import copy import numpy as np +import multiprocessing as mp import pickle -import sbmlxdf from modelpruner.models.fba_model import FbaModel from modelpruner.core.protected_parts import ProtectedParts +global _fba_model + class ModelPruner: def __init__(self, sbml_fname, protected_parts, reduced_fname=None, resume=False): + self.tolerance = 1e-8 # later accessible by set_params() ? + self.cpus = int(os.cpu_count()*.8) + if reduced_fname is None: self.reduced_fname = re.sub(r'.xml$', '_red.xml', sbml_fname) self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname) self._snapshot_pkl = re.sub(r'.xml$', '_snapshot.pkl', sbml_fname) if resume is True: - if os.path.exists(self._snapshot_sbml): - self.sbml_model = sbmlxdf.Model(self._snapshot_sbml) - print(f'Resume from snapshot from {self._snapshot_sbml} ' - f'(last modified: {time.ctime(os.path.getmtime(self._snapshot_sbml))})') - else: - print(f'{self._snapshot_sbml} not found!') - raise FileNotFoundError - else: - if os.path.exists(sbml_fname): - self.sbml_model = sbmlxdf.Model(sbml_fname) - print(f'Full SBML model loaded from {sbml_fname} ' - f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})') - else: - print(f'{sbml_fname} not found!') - raise FileNotFoundError - - self.tolerance = 1e-8 # later accessible by set_params() ? - - self.fba_model = FbaModel(self.sbml_model) + sbml_fname = self._snapshot_sbml + self.fba_model = FbaModel(sbml_fname) self.nrp = ProtectedParts(protected_parts) self.protected_sids = self.nrp.initial_protected_sids @@ -139,7 +128,6 @@ class ModelPruner: print('no more reactions to remove') # store intermediate results (snapshots) - print(f'snapshot check next: {next_snapshot}, rids: {len(self.fba_model.rids)}') if (next_snapshot is not None) and (len(self.fba_model.rids) < next_snapshot): self.export_pruned_model(self._snapshot_sbml) with open(self._snapshot_pkl, 'wb') as f: @@ -160,45 +148,72 @@ class ModelPruner: if os.path.exists(fname): os.remove(fname) - def reaction_types_fva(self, free_rids): + def init_worker(self): + global _fba_model + _fba_model = copy.deepcopy(self.fba_model) - flux_min_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids))) - flux_max_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids))) + @staticmethod + def fva_single_rids(rid): + return _fba_model.fva_single_rid(rid) + def reaction_types_fva(self, free_rids): + + print(time.strftime("%H:%M:%S", time.localtime())) n_pf = len(self.nrp.protected_functions) + flux_min_pfs = np.zeros((n_pf, len(free_rids))) + flux_max_pfs = np.zeros((n_pf, len(free_rids))) + + processes = min(self.cpus, len(free_rids)) for idx, pf in enumerate(self.nrp.protected_functions.values()): self.set_temp_func_conditions(pf) - res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0) + if processes > 20: + # place to split fva for free_rids over several + rid2idx = {rid: idx for idx, rid in enumerate(free_rids)} + pool = mp.Pool(processes, initializer=self.init_worker) + for rid, res in pool.imap_unordered(self.fva_single_rids, free_rids): + flux_min_pfs[idx, rid2idx[rid]] = res['min'] + flux_max_pfs[idx, rid2idx[rid]] = res['max'] + pool.close() + pool.join() + else: + res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0) + flux_min_pfs[idx] = res['min'] + flux_max_pfs[idx] = res['max'] + self.restore_base_conditions() - if res['success'] is False: - print('FVA unsuccessful') - return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []} - flux_min_pfs[idx] = res['min'] - flux_max_pfs[idx] = res['max'] + # if res['success'] is False: + # print('FVA unsuccessful') + # return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []} + sys.stdout.write('\rFVA[{}{}] {:3.0f}%'.format( '=' * int(idx+1), ' ' * int(n_pf - idx-1), (idx+1)/n_pf*100)) sys.stdout.flush() print() + # analyze flux ranges per reaction, aggregate fluxes flux_min_pfs[np.abs(flux_min_pfs) < self.tolerance] = 0.0 flux_max_pfs[np.abs(flux_max_pfs) < self.tolerance] = 0.0 flux_min = np.min(flux_min_pfs, axis=0) flux_max = np.max(flux_max_pfs, axis=0) + # blocked reactions do not carry any flux in all conditions mask_b = np.all(np.vstack((flux_min == 0, flux_max == 0)), axis=0) blocked_rids = set(np.array(free_rids)[mask_b]) + # essential reactions carry either only forward or reverse flux, exluding zero flux mask_e1 = np.all(np.vstack((flux_min < 0, flux_max < 0)), axis=0) mask_e2 = np.all(np.vstack((flux_min > 0, flux_max > 0)), axis=0) mask_e = np.any(np.vstack((mask_e1, mask_e2)), axis=0) essential_rids = set(np.array(free_rids)[mask_e]) + # reactions that support zero flux are considered candiates for removal + # here such reactions are sorted from smallest to largest flux span mask_c = np.all(np.vstack((np.logical_not(mask_b), np.logical_not(mask_e))), axis=0) flux_span = flux_max - flux_min ind = np.argsort(flux_span) - candidate_rids_sorted = list(np.array(free_rids)[ind][np.array(mask_c)[ind]]) + return {'blocked_rids': blocked_rids, 'essential_rids': essential_rids, 'candidate_rids_sorted': candidate_rids_sorted} diff --git a/modelpruner/models/fba_model.py b/modelpruner/models/fba_model.py index 3496c3a91e202abc32f91a77c5397a14893ef476..1613be9ee1d4611b857a9e716e276987d0abd044 100644 --- a/modelpruner/models/fba_model.py +++ b/modelpruner/models/fba_model.py @@ -1,5 +1,7 @@ +import os +import time import re import math import numpy as np @@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem class FbaModel: - def __init__(self, sbml_model): + def __init__(self, sbml_fname): - self.model_dict = sbml_model.to_df() - df_reactions = self.model_dict['reactions'] - df_species = self.model_dict['species'] - df_fbc_objectives = self.model_dict['fbcObjectives'] + self.sbml_fname = sbml_fname + if os.path.exists(sbml_fname): + sbml_model = sbmlxdf.Model(sbml_fname) + print(f'SBML model loaded from {sbml_fname} ' + f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})') + else: + print(f'{sbml_fname} not found!') + raise FileNotFoundError + + model_dict = sbml_model.to_df() + df_reactions = model_dict['reactions'] + df_species = model_dict['species'] + df_fbc_objectives = model_dict['fbcObjectives'] self.full_model_shape = {'n_full_rids': len(df_reactions), 'n_full_sids': len(df_species)} @@ -28,6 +39,11 @@ class FbaModel: df_reactions['fbcUb'].to_numpy())) self.s_mat = sbml_model.get_s_matrix().to_numpy() + # TODO: create LP Problem only once (but what is impact on deepcopy() + # function to assign new LP problem + # TODO: delete rows / cols in LP problem on removal of rids, sids + # TODO: prior to solve, add objective, thereafter immediatly remove objective + self.objective = {} for oid, row in df_fbc_objectives.iterrows(): if row['active'] is True: @@ -135,6 +151,7 @@ class FbaModel: rids = self.rids flux_min = np.zeros(len(rids)) flux_max = np.zeros(len(rids)) + # fix FBA objective as constraint fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible])) if self.obj_dir == 'maximize': @@ -164,6 +181,53 @@ class FbaModel: del lp return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max} + def fva_single_rid(self, rids): + """FVA - flux variability analysis for all or selected reactions. + + :param rids: selected reaction ids to run FVA (alternatively, all) + :type rids: list of strings (default: None - all reactions) + :return: rid, min and max flux values + :rtype: dict + """ + lp = self.create_fba_lp() + if lp is None: + return {'success': False, 'message': 'not an FBA model'} + res = lp.solve(short_result=True) + + if res['success'] is False: + del lp + return {'success': False, 'message': res['generic_status']} + + if rids is None: + rids = self.rids + flux_min = np.zeros(len(rids)) + flux_max = np.zeros(len(rids)) + # fix FBA objective as constraint + fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible])) + if self.obj_dir == 'maximize': + row_bds = np.array([0.0, np.nan]) + else: + row_bds = np.array([np.nan, 1000.0]) + lp.add_constraint(fwd_bwd_obj_coefs, row_bds) + + n_cols = len(self.rids) + for idx, rid in enumerate(rids): + # select reaction maximize/minimize flux + new_obj_coefs = np.zeros(n_cols) + new_obj_coefs[self.rid2idx[rid]] = 1 + new_fwd_bwd_obj_coefs = np.hstack((new_obj_coefs, -new_obj_coefs[self.reversible])) + lp.set_obj_coefs(new_fwd_bwd_obj_coefs) + + lp.set_obj_dir('minimize') + res = lp.solve(short_result=True) + flux_min[idx] = res['fun'] if res['success'] else np.nan + + lp.set_obj_dir('maximize') + res = lp.solve(short_result=True) + flux_max[idx] = res['fun'] if res['success'] else np.nan + del lp + return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max} + def update_flux_bounds(self, flux_bounds): """selectivly set lower/upper flux bounds for given reactions. @@ -269,9 +333,11 @@ class FbaModel: def export_pruned_model(self, pruned_sbml): - pruned_mdict = self.model_dict.copy() - pruned_mdict['reactions'] = self.model_dict['reactions'].loc[self.rids] - pruned_mdict['species'] = self.model_dict['species'].loc[self.sids] + sbml_model = sbmlxdf.Model(self.sbml_fname) + model_dict = sbml_model.to_df() + pruned_mdict = model_dict.copy() + pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids] + pruned_mdict['species'] = model_dict['species'].loc[self.sids] # update model attributes (ids, names and modification date) pruned_mdict['modelAttrs']['id'] += '_pruned' diff --git a/modelpruner/problems/lp_problem.py b/modelpruner/problems/lp_problem.py index 3bc4932e6f4815dca41e7ce4d16b57834c255765..e346a33011f0aaf3a223a29d2efed89d1b214495 100644 --- a/modelpruner/problems/lp_problem.py +++ b/modelpruner/problems/lp_problem.py @@ -258,6 +258,7 @@ class LpProblem: glpk.glp_scale_prob(self.lp, opt) # sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))] # rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))] + # TODO: glpk.glp_adv_basis(self.lp, 0) simplex_result = glpk.glp_simplex(self.lp, smcp) return self.results(simplex_result, short_result)