start implementing mp

0a9159d6 · Peter Schubert · 50ebc192 · 0a9159d6 · 0a9159d6 · 0a9159d6
Commit 0a9159d6 authored 2 years ago by Peter Schubert
--- a/modelpruner/core/model_pruner.py
+++ b/modelpruner/core/model_pruner.py
@@ -4,43 +4,32 @@ import sys
 import os
 import re
 import time
+import copy
 import numpy as np
+import multiprocessing as mp
 import pickle
-import sbmlxdf
 from modelpruner.models.fba_model import FbaModel
 from modelpruner.core.protected_parts import ProtectedParts
+global _fba_model
 class ModelPruner:
    def __init__(self, sbml_fname, protected_parts, reduced_fname=None, resume=False):
+        self.tolerance = 1e-8   # later accessible by set_params() ?
+        self.cpus = int(os.cpu_count()*.8)
        if reduced_fname is None:
            self.reduced_fname = re.sub(r'.xml$', '_red.xml', sbml_fname)
        self._snapshot_sbml = re.sub(r'.xml$', '_snapshot.xml', sbml_fname)
        self._snapshot_pkl = re.sub(r'.xml$', '_snapshot.pkl', sbml_fname)
        if resume is True:
-            if os.path.exists(self._snapshot_sbml):
+            sbml_fname = self._snapshot_sbml
-                self.sbml_model = sbmlxdf.Model(self._snapshot_sbml)
+        self.fba_model = FbaModel(sbml_fname)
-                print(f'Resume from snapshot from {self._snapshot_sbml} '
-                      f'(last modified: {time.ctime(os.path.getmtime(self._snapshot_sbml))})')
-            else:
-                print(f'{self._snapshot_sbml} not found!')
-                raise FileNotFoundError
-        else:
-            if os.path.exists(sbml_fname):
-                self.sbml_model = sbmlxdf.Model(sbml_fname)
-                print(f'Full SBML model loaded from {sbml_fname} '
-                      f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
-            else:
-                print(f'{sbml_fname} not found!')
-                raise FileNotFoundError
-        self.tolerance = 1e-8   # later accessible by set_params() ?
-        self.fba_model = FbaModel(self.sbml_model)
        self.nrp = ProtectedParts(protected_parts)
        self.protected_sids = self.nrp.initial_protected_sids
@@ -139,7 +128,6 @@ class ModelPruner:
                print('no more reactions to remove')
            # store intermediate results (snapshots)
-            print(f'snapshot check next: {next_snapshot}, rids: {len(self.fba_model.rids)}')
            if (next_snapshot is not None) and (len(self.fba_model.rids) < next_snapshot):
                self.export_pruned_model(self._snapshot_sbml)
                with open(self._snapshot_pkl, 'wb') as f:
@@ -160,45 +148,72 @@ class ModelPruner:
            if os.path.exists(fname):
                os.remove(fname)
-    def reaction_types_fva(self, free_rids):
+    def init_worker(self):
+        global _fba_model
+        _fba_model = copy.deepcopy(self.fba_model)
+    @staticmethod
+    def fva_single_rids(rid):
+        return _fba_model.fva_single_rid(rid)
-        flux_min_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids)))
+    def reaction_types_fva(self, free_rids):
-        flux_max_pfs = np.zeros((len(self.nrp.protected_functions), len(free_rids)))
+        print(time.strftime("%H:%M:%S", time.localtime()))
        n_pf = len(self.nrp.protected_functions)
+        flux_min_pfs = np.zeros((n_pf, len(free_rids)))
+        flux_max_pfs = np.zeros((n_pf, len(free_rids)))
+        processes = min(self.cpus, len(free_rids))
        for idx, pf in enumerate(self.nrp.protected_functions.values()):
            self.set_temp_func_conditions(pf)
+            if processes > 20:
+                # place to split fva for free_rids over several
+                rid2idx = {rid: idx for idx, rid in enumerate(free_rids)}
+                pool = mp.Pool(processes, initializer=self.init_worker)
+                for rid, res in pool.imap_unordered(self.fva_single_rids, free_rids):
+                    flux_min_pfs[idx, rid2idx[rid]] = res['min']
+                    flux_max_pfs[idx, rid2idx[rid]] = res['max']
+                pool.close()
+                pool.join()
+            else:
                res = self.fba_model.fva_optimize(free_rids, fract_of_optimum=0.0)
-            self.restore_base_conditions()
-            if res['success'] is False:
-                print('FVA unsuccessful')
-                return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
                flux_min_pfs[idx] = res['min']
                flux_max_pfs[idx] = res['max']
+            self.restore_base_conditions()
+            # if res['success'] is False:
+            #    print('FVA unsuccessful')
+            #    return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
            sys.stdout.write('\rFVA[{}{}] {:3.0f}%'.format(
                '=' * int(idx+1), ' ' * int(n_pf - idx-1), (idx+1)/n_pf*100))
            sys.stdout.flush()
        print()
+        # analyze flux ranges per reaction, aggregate fluxes
        flux_min_pfs[np.abs(flux_min_pfs) < self.tolerance] = 0.0
        flux_max_pfs[np.abs(flux_max_pfs) < self.tolerance] = 0.0
        flux_min = np.min(flux_min_pfs, axis=0)
        flux_max = np.max(flux_max_pfs, axis=0)
+        # blocked reactions do not carry any flux in all conditions
        mask_b = np.all(np.vstack((flux_min == 0, flux_max == 0)), axis=0)
        blocked_rids = set(np.array(free_rids)[mask_b])
+        # essential reactions carry either only forward or reverse flux, exluding zero flux
        mask_e1 = np.all(np.vstack((flux_min < 0, flux_max < 0)), axis=0)
        mask_e2 = np.all(np.vstack((flux_min > 0, flux_max > 0)), axis=0)
        mask_e = np.any(np.vstack((mask_e1, mask_e2)), axis=0)
        essential_rids = set(np.array(free_rids)[mask_e])
+        # reactions that support zero flux are considered candiates for removal
+        #  here such reactions are sorted from smallest to largest flux span
        mask_c = np.all(np.vstack((np.logical_not(mask_b), np.logical_not(mask_e))), axis=0)
        flux_span = flux_max - flux_min
        ind = np.argsort(flux_span)
        candidate_rids_sorted = list(np.array(free_rids)[ind][np.array(mask_c)[ind]])
        return {'blocked_rids': blocked_rids, 'essential_rids': essential_rids,
                'candidate_rids_sorted': candidate_rids_sorted}

--- a/modelpruner/models/fba_model.py
+++ b/modelpruner/models/fba_model.py
+import os
+import time
 import re
 import math
 import numpy as np
@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem
 class FbaModel:
-    def __init__(self, sbml_model):
+    def __init__(self, sbml_fname):
-        self.model_dict = sbml_model.to_df()
+        self.sbml_fname = sbml_fname
-        df_reactions = self.model_dict['reactions']
+        if os.path.exists(sbml_fname):
-        df_species = self.model_dict['species']
+            sbml_model = sbmlxdf.Model(sbml_fname)
-        df_fbc_objectives = self.model_dict['fbcObjectives']
+            print(f'SBML model loaded from {sbml_fname} '
+                  f'(last modified: {time.ctime(os.path.getmtime(sbml_fname))})')
+        else:
+            print(f'{sbml_fname} not found!')
+            raise FileNotFoundError
+        model_dict = sbml_model.to_df()
+        df_reactions = model_dict['reactions']
+        df_species = model_dict['species']
+        df_fbc_objectives = model_dict['fbcObjectives']
        self.full_model_shape = {'n_full_rids': len(df_reactions),
                                 'n_full_sids': len(df_species)}
@@ -28,6 +39,11 @@ class FbaModel:
                                      df_reactions['fbcUb'].to_numpy()))
        self.s_mat = sbml_model.get_s_matrix().to_numpy()
+        # TODO: create LP Problem only once (but what is impact on deepcopy()
+        #   function to assign new LP problem
+        # TODO: delete rows / cols in LP problem on removal of rids, sids
+        # TODO: prior to solve, add objective, thereafter immediatly remove objective
        self.objective = {}
        for oid, row in df_fbc_objectives.iterrows():
            if row['active'] is True:
@@ -135,6 +151,7 @@ class FbaModel:
            rids = self.rids
        flux_min = np.zeros(len(rids))
        flux_max = np.zeros(len(rids))
        # fix FBA objective as constraint
        fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
        if self.obj_dir == 'maximize':
@@ -164,6 +181,53 @@ class FbaModel:
        del lp
        return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
+    def fva_single_rid(self, rids):
+        """FVA - flux variability analysis for all or selected reactions.
+        :param rids: selected reaction ids to run FVA (alternatively, all)
+        :type rids: list of strings (default: None - all reactions)
+        :return: rid, min and max flux values
+        :rtype: dict
+        """
+        lp = self.create_fba_lp()
+        if lp is None:
+            return {'success': False, 'message': 'not an FBA model'}
+        res = lp.solve(short_result=True)
+        if res['success'] is False:
+            del lp
+            return {'success': False, 'message': res['generic_status']}
+        if rids is None:
+            rids = self.rids
+        flux_min = np.zeros(len(rids))
+        flux_max = np.zeros(len(rids))
+        # fix FBA objective as constraint
+        fwd_bwd_obj_coefs = np.hstack((self.obj_coefs, -self.obj_coefs[self.reversible]))
+        if self.obj_dir == 'maximize':
+            row_bds = np.array([0.0, np.nan])
+        else:
+            row_bds = np.array([np.nan, 1000.0])
+        lp.add_constraint(fwd_bwd_obj_coefs, row_bds)
+        n_cols = len(self.rids)
+        for idx, rid in enumerate(rids):
+            # select reaction maximize/minimize flux
+            new_obj_coefs = np.zeros(n_cols)
+            new_obj_coefs[self.rid2idx[rid]] = 1
+            new_fwd_bwd_obj_coefs = np.hstack((new_obj_coefs, -new_obj_coefs[self.reversible]))
+            lp.set_obj_coefs(new_fwd_bwd_obj_coefs)
+            lp.set_obj_dir('minimize')
+            res = lp.solve(short_result=True)
+            flux_min[idx] = res['fun'] if res['success'] else np.nan
+            lp.set_obj_dir('maximize')
+            res = lp.solve(short_result=True)
+            flux_max[idx] = res['fun'] if res['success'] else np.nan
+        del lp
+        return {'success': True, 'rids': np.array(rids), 'min': flux_min, 'max': flux_max}
    def update_flux_bounds(self, flux_bounds):
        """selectivly set lower/upper flux bounds for given reactions.
@@ -269,9 +333,11 @@ class FbaModel:
    def export_pruned_model(self, pruned_sbml):
-        pruned_mdict = self.model_dict.copy()
+        sbml_model = sbmlxdf.Model(self.sbml_fname)
-        pruned_mdict['reactions'] = self.model_dict['reactions'].loc[self.rids]
+        model_dict = sbml_model.to_df()
-        pruned_mdict['species'] = self.model_dict['species'].loc[self.sids]
+        pruned_mdict = model_dict.copy()
+        pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids]
+        pruned_mdict['species'] = model_dict['species'].loc[self.sids]
        # update model attributes (ids, names and modification date)
        pruned_mdict['modelAttrs']['id'] += '_pruned'

--- a/modelpruner/problems/lp_problem.py
+++ b/modelpruner/problems/lp_problem.py
@@ -258,6 +258,7 @@ class LpProblem:
            glpk.glp_scale_prob(self.lp, opt)
        # sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))]
        # rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))]
+        # TODO: glpk.glp_adv_basis(self.lp, 0)
        simplex_result = glpk.glp_simplex(self.lp, smcp)
        return self.results(simplex_result, short_result)