From fb15ea69662810c1871379a8bf1eb75f6a09cc28 Mon Sep 17 00:00:00 2001
From: Peter Schubert <Peter.Schubert@hhu.de>
Date: Thu, 20 Oct 2022 17:22:04 +0200
Subject: [PATCH] ProtectedParts exposed. Create FBA Objective based on first
 protected function

---
 modelpruner/__init__.py             |  1 +
 modelpruner/core/__init__.py        |  3 ++-
 modelpruner/core/protected_parts.py | 32 ++++++++++++++---------------
 modelpruner/models/fba_model.py     | 21 ++++++++++++-------
 4 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/modelpruner/__init__.py b/modelpruner/__init__.py
index e422a21..4a33acb 100644
--- a/modelpruner/__init__.py
+++ b/modelpruner/__init__.py
@@ -1,5 +1,6 @@
 from modelpruner._version import __version__
 from modelpruner.core.model_pruner import ModelPruner
+from modelpruner.core.protected_parts import ProtectedParts
 
 from . import core, models, problems
 
diff --git a/modelpruner/core/__init__.py b/modelpruner/core/__init__.py
index ca981c2..42fe4ff 100644
--- a/modelpruner/core/__init__.py
+++ b/modelpruner/core/__init__.py
@@ -1,5 +1,6 @@
 
 
 from .model_pruner import ModelPruner
+from .model_pruner import ProtectedParts
 
-__all__ = ['ModelPruner']
+__all__ = ['ModelPruner', 'ProtectedParts']
diff --git a/modelpruner/core/protected_parts.py b/modelpruner/core/protected_parts.py
index 4f2180d..d77aeae 100644
--- a/modelpruner/core/protected_parts.py
+++ b/modelpruner/core/protected_parts.py
@@ -13,7 +13,7 @@ class ProtectedParts:
 
         if type(protected_parts) == str:
             if os.path.exists(protected_parts):
-                nrp = self.load_raw_protected_data(protected_parts)
+                pp_dict = self.load_raw_protected_data(protected_parts)
                 print(f'Protected data loaded from {protected_parts} '
                       f'(last modified: {time.ctime(os.path.getmtime(protected_parts))})')
             else:
@@ -21,29 +21,29 @@ class ProtectedParts:
                 raise FileNotFoundError
         else:
             assert type(protected_parts) == dict, 'expected filename or a dict'
-            nrp = protected_parts
+            pp_dict = protected_parts
 
-        self.protected_rids = set(nrp.get('reactions', []))
-        self.protected_sids = set(nrp.get('metabolites', []))
-        if 'bounds' in nrp:
+        self.protected_rids = set(pp_dict.get('reactions', []))
+        self.protected_sids = set(pp_dict.get('metabolites', []))
+        if 'bounds' in pp_dict:
             num_cols = ['fbcLb', 'fbcUb']
-            nrp['bounds'][num_cols] = nrp['bounds'][num_cols].applymap(
+            pp_dict['bounds'][num_cols] = pp_dict['bounds'][num_cols].applymap(
                 lambda x: np.nan if type(x) == str else x)
             self.overwrite_bounds = {rid: [row['fbcLb'], row['fbcUb']]
-                                     for rid, row in nrp['bounds'].iterrows()}
+                                     for rid, row in pp_dict['bounds'].iterrows()}
         else:
             self.overwrite_bounds = {}
 
         self.protected_functions = {}
-        if 'functions' in nrp:
+        if 'functions' in pp_dict:
             num_cols = ['fbcLb', 'fbcUb', 'fraction']
-            nrp['functions'][num_cols] = nrp['functions'][num_cols].applymap(
+            pp_dict['functions'][num_cols] = pp_dict['functions'][num_cols].applymap(
                 lambda x: np.nan if type(x) == str else x)
             str_cols = ['fluxObjectives', 'reaction']
-            nrp['functions'][str_cols] = nrp['functions'][str_cols].applymap(
+            pp_dict['functions'][str_cols] = pp_dict['functions'][str_cols].applymap(
                 lambda x: '' if type(x) != str else x)
-            for func_id in list(nrp['functions'].index.unique()):
-                df_pf = nrp['functions'].loc[func_id]
+            for func_id in list(pp_dict['functions'].index.unique()):
+                df_pf = pp_dict['functions'].loc[func_id]
                 self.protected_functions[func_id] = ProtectedFunction(df_pf)
 
     @staticmethod
@@ -57,15 +57,15 @@ class ProtectedParts:
         :returns: protected parts including:
         :rtype: dict
         """
-        nrp = {}
+        pp_dict = {}
         with pd.ExcelFile(fname) as xlsx:
             for key in ['reactions', 'metabolites']:
                 if key in xlsx.sheet_names:
-                    nrp[key] = pd.read_excel(xlsx, sheet_name=key).iloc[:, 0].to_list()
+                    pp_dict[key] = pd.read_excel(xlsx, sheet_name=key).iloc[:, 0].to_list()
             for key in ['functions', 'bounds']:
                 if key in xlsx.sheet_names:
-                    nrp[key] = pd.read_excel(xlsx, sheet_name=key, index_col=0)
-        return nrp
+                    pp_dict[key] = pd.read_excel(xlsx, sheet_name=key, index_col=0)
+        return pp_dict
 
 
 class ProtectedFunction:
diff --git a/modelpruner/models/fba_model.py b/modelpruner/models/fba_model.py
index 68b2437..9021ab5 100644
--- a/modelpruner/models/fba_model.py
+++ b/modelpruner/models/fba_model.py
@@ -50,6 +50,7 @@ class FbaModel:
         # lp problem will only be created once required.
         # this gives chances to update e.g. flux bounds and to support pickling (multiprocessing)
         self.lp = None
+        self.scaling = 'AUTO'   # check also '2N'
 
     def update_reversible_rids(self):
         self.rids_rev = self.rids[self.reversible]
@@ -95,7 +96,10 @@ class FbaModel:
         fwd_rev_rids = np.hstack((self.rids, self.rids_rev))
 
         # identify groups of parallel reactions
-        sorted_idx = np.lexsort(fwd_rev_s_mat_coo.todense())[0]
+        # first we sort the columns of the S-matrix (sorted_idx: index into sorted S-matrix)
+        #  parallel reactions would be next to each other in sorted_idx
+        # second we check if parallel columns are identical (delta_coo) i.e. difference is zero vector
+        sorted_idx = np.lexsort(fwd_rev_s_mat_coo.toarray())
         groups = []
         consumed_idxs = set()
         for pos in range(len(sorted_idx)):
@@ -104,8 +108,8 @@ class FbaModel:
                 group = [idx]
                 for adjacent_pos in range(pos + 1, len(sorted_idx)):
                     candiate_idx = sorted_idx[adjacent_pos]
-                    if (fwd_rev_s_mat_coo.getcol(idx) != fwd_rev_s_mat_coo.getcol(candiate_idx))\
-                            .getnnz() == 0:
+                    delta_coo = fwd_rev_s_mat_coo.getcol(idx) - fwd_rev_s_mat_coo.getcol(candiate_idx)
+                    if delta_coo.getnnz() == 0:
                         consumed_idxs.add(candiate_idx)
                         group.append(candiate_idx)
                     else:
@@ -142,7 +146,7 @@ class FbaModel:
         status.update(self.full_model_shape)
         return status
 
-    def export_pruned_model(self, pruned_sbml):
+    def export_pruned_model(self, pruned_sbml, df_fba_objectives):
         """Export the pruned network (or snapshot) to SBML.
 
         Component information of initial network will be carried over,
@@ -153,10 +157,13 @@ class FbaModel:
 
         :param pruned_sbml: pathname of pruned network, ending with '.xml'
         :type pruned_sbml: str
+        :param df_fba_objectives: new fba objective to overwrite existing model objetive
+        :type df_fba_objectives: pandas DataFrame
         """
         sbml_model = sbmlxdf.Model(self.sbml_fname)
         model_dict = sbml_model.to_df()
         pruned_mdict = model_dict.copy()
+        pruned_mdict['fbcObjectives'] = df_fba_objectives.copy()
         pruned_mdict['reactions'] = model_dict['reactions'].loc[self.rids]
         pruned_mdict['species'] = model_dict['species'].loc[self.sids]
 
@@ -372,7 +379,7 @@ class FbaModel:
 
         self.set_lp_objective(pf.objective, pf.obj_dir)
         self.update_lp_flux_bounds(overwrite_bounds)
-        res = self.lp.solve()
+        res = self.lp.solve(scaling=self.scaling)
         # print(res['success'], res['fun'])
         self.reset_lp_flux_bounds(overwrite_bounds.keys())
         self.remove_lp_objective(pf.objective)
@@ -428,10 +435,10 @@ class FbaModel:
         flux_max = np.zeros(len(rids))
         for idx, rid in enumerate(rids):
             self.set_lp_objective({rid: 1}, 'minimize')
-            res = self.lp.solve()
+            res = self.lp.solve(scaling=self.scaling)
             flux_min[idx] = res['fun'] if res['success'] else np.nan
             self.lp.set_obj_dir('maximize')
-            res = self.lp.solve()
+            res = self.lp.solve(scaling=self.scaling)
             flux_max[idx] = res['fun'] if res['success'] else np.nan
             self.remove_lp_objective({rid: 1})
 
-- 
GitLab