ProtectedParts exposed. Create FBA Objective based on first protected function

fb15ea69 · Peter Schubert · 01d4f174 · fb15ea69 · fb15ea69 · fb15ea69
Commit fb15ea69 authored 2 years ago by Peter Schubert
--- a/modelpruner/__init__.py
+++ b/modelpruner/__init__.py
 from modelpruner._version import __version__
 from modelpruner.core.model_pruner import ModelPruner
+from modelpruner.core.protected_parts import ProtectedParts
 from . import core, models, problems

--- a/modelpruner/core/__init__.py
+++ b/modelpruner/core/__init__.py
 from .model_pruner import ModelPruner
+from .model_pruner import ProtectedParts
-__all__ = ['ModelPruner']
+__all__ = ['ModelPruner', 'ProtectedParts']
--- a/modelpruner/core/protected_parts.py
+++ b/modelpruner/core/protected_parts.py
@@ -13,7 +13,7 @@ class ProtectedParts:
        if type(protected_parts) == str:
            if os.path.exists(protected_parts):
-                nrp = self.load_raw_protected_data(protected_parts)
+                pp_dict = self.load_raw_protected_data(protected_parts)
                print(f'Protected data loaded from {protected_parts} '
                      f'(last modified: {time.ctime(os.path.getmtime(protected_parts))})')
            else:
@@ -21,29 +21,29 @@ class ProtectedParts:
                raise FileNotFoundError
        else:
            assert type(protected_parts) == dict, 'expected filename or a dict'
-            nrp = protected_parts
+            pp_dict = protected_parts
-        self.protected_rids = set(nrp.get('reactions', []))
+        self.protected_rids = set(pp_dict.get('reactions', []))
-        self.protected_sids = set(nrp.get('metabolites', []))
+        self.protected_sids = set(pp_dict.get('metabolites', []))
-        if 'bounds' in nrp:
+        if 'bounds' in pp_dict:
            num_cols = ['fbcLb', 'fbcUb']
-            nrp['bounds'][num_cols] = nrp['bounds'][num_cols].applymap(
+            pp_dict['bounds'][num_cols] = pp_dict['bounds'][num_cols].applymap(
                lambda x: np.nan if type(x) == str else x)
            self.overwrite_bounds = {rid: [row['fbcLb'], row['fbcUb']]
-                                     for rid, row in nrp['bounds'].iterrows()}
+                                     for rid, row in pp_dict['bounds'].iterrows()}
        else:
            self.overwrite_bounds = {}
        self.protected_functions = {}
-        if 'functions' in nrp:
+        if 'functions' in pp_dict:
            num_cols = ['fbcLb', 'fbcUb', 'fraction']
-            nrp['functions'][num_cols] = nrp['functions'][num_cols].applymap(
+            pp_dict['functions'][num_cols] = pp_dict['functions'][num_cols].applymap(
                lambda x: np.nan if type(x) == str else x)
            str_cols = ['fluxObjectives', 'reaction']
-            nrp['functions'][str_cols] = nrp['functions'][str_cols].applymap(
+            pp_dict['functions'][str_cols] = pp_dict['functions'][str_cols].applymap(
                lambda x: '' if type(x) != str else x)
-            for func_id in list(nrp['functions'].index.unique()):
+            for func_id in list(pp_dict['functions'].index.unique()):
-                df_pf = nrp['functions'].loc[func_id]
+                df_pf = pp_dict['functions'].loc[func_id]
                self.protected_functions[func_id] = ProtectedFunction(df_pf)
    @staticmethod
@@ -57,15 +57,15 @@ class ProtectedParts:
        :returns: protected parts including:
        :rtype: dict
        """
-        nrp = {}
+        pp_dict = {}
        with pd.ExcelFile(fname) as xlsx:
            for key in ['reactions', 'metabolites']:
                if key in xlsx.sheet_names:
-                    nrp[key] = pd.read_excel(xlsx, sheet_name=key).iloc[:, 0].to_list()
+                    pp_dict[key] = pd.read_excel(xlsx, sheet_name=key).iloc[:, 0].to_list()
            for key in ['functions', 'bounds']:
                if key in xlsx.sheet_names:
-                    nrp[key] = pd.read_excel(xlsx, sheet_name=key, index_col=0)
+                    pp_dict[key] = pd.read_excel(xlsx, sheet_name=key, index_col=0)
-        return nrp
+        return pp_dict
 class ProtectedFunction:

--- a/modelpruner/models/fba_model.py
+++ b/modelpruner/models/fba_model.py
@@ -50,6 +50,7 @@ class FbaModel:
        # lp problem will only be created once required.
        # this gives chances to update e.g. flux bounds and to support pickling (multiprocessing)
        self.lp = None
+        self.scaling = 'AUTO'   # check also '2N'
    def update_reversible_rids(self):
        self.rids_rev = self.rids[self.reversible]
@@ -95,7 +96,10 @@ class FbaModel:
        fwd_rev_rids = np.hstack((self.rids, self.rids_rev))
        # identify groups of parallel reactions
-        sorted_idx = np.lexsort(fwd_rev_s_mat_coo.todense())[0]
+        # first we sort the columns of the S-matrix (sorted_idx: index into sorted S-matrix)
+        #  parallel reactions would be next to each other in sorted_idx
+        # second we check if parallel columns are identical (delta_coo) i.e. difference is zero vector
+        sorted_idx = np.lexsort(fwd_rev_s_mat_coo.toarray())
        groups = []
        consumed_idxs = set()
        for pos in range(len(sorted_idx)):
@@ -104,8 +108,8 @@ class FbaModel:
                group = [idx]
                for adjacent_pos in range(pos + 1, len(sorted_idx)):
                    candiate_idx = sorted_idx[adjacent_pos]
-                    if (fwd_rev_s_mat_coo.getcol(idx) != fwd_rev_s_mat_coo.getcol(candiate_idx))\
+                    delta_coo = fwd_rev_s_mat_coo.getcol(idx) - fwd_rev_s_mat_coo.getcol(candiate_idx)
-                            .getnnz() == 0:
+                    if delta_coo.getnnz() == 0:
                        consumed_idxs.add(candiate_idx)
                        group.append(candiate_idx)
                    else:
@@ -142,7 +146,7 @@ class FbaModel:
        status.update(self.full_model_shape)
        return status
-    def export_pruned_model(self, pruned_sbml):
+    def export_pruned_model(self, pruned_sbml, df_fba_objectives):
        """Export the pruned network (or snapshot) to SBML.
        Component information of initial network will be carried over,
@@ -153,10 +157,13 @@ class FbaModel:
        :param pruned_sbml: pathname of pruned network, ending with '.xml'
        :type pruned_sbml: str
+        :param df_fba_objectives: new fba objective to overwrite existing model objetive
+        :type df_fba_objectives: pandas DataFrame
        """
        sbml_model = sbmlxdf.Model(self.sbml_fname)
        model_dict = sbml_model.to_df()
        pruned_mdict = model_dict.copy()
+        pruned_mdict['fbcObjectives'] = df_fba_objectives.copy()
        pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids]
        pruned_mdict['species'] = model_dict['species'].loc[self.sids]
@@ -372,7 +379,7 @@ class FbaModel:
        self.set_lp_objective(pf.objective, pf.obj_dir)
        self.update_lp_flux_bounds(overwrite_bounds)
-        res = self.lp.solve()
+        res = self.lp.solve(scaling=self.scaling)
        # print(res['success'], res['fun'])
        self.reset_lp_flux_bounds(overwrite_bounds.keys())
        self.remove_lp_objective(pf.objective)
@@ -428,10 +435,10 @@ class FbaModel:
        flux_max = np.zeros(len(rids))
        for idx, rid in enumerate(rids):
            self.set_lp_objective({rid: 1}, 'minimize')
-            res = self.lp.solve()
+            res = self.lp.solve(scaling=self.scaling)
            flux_min[idx] = res['fun'] if res['success'] else np.nan
            self.lp.set_obj_dir('maximize')
-            res = self.lp.solve()
+            res = self.lp.solve(scaling=self.scaling)
            flux_max[idx] = res['fun'] if res['success'] else np.nan
            self.remove_lp_objective({rid: 1})