diff --git a/xbanalysis/problems/__init__.py b/xbanalysis/problems/__init__.py index a7326aaac46b6415ea4bd89bc8c8e210d9b8256c..a43385f072965d7770700fb82df60e18bed4dd5d 100644 --- a/xbanalysis/problems/__init__.py +++ b/xbanalysis/problems/__init__.py @@ -3,5 +3,6 @@ from .gba_problem import GbaProblem from .rba_problem import RbaProblem from .fba_problem import FbaProblem +from .gba_sflux_problem import GbaSfluxProblem -__all__ = ['GbaProblem', 'FbaProblem', 'RbaProblem'] +__all__ = ['GbaProblem', 'FbaProblem', 'RbaProblem', 'GbaSfluxProblem'] diff --git a/xbanalysis/problems/gba_sflux_problem.py b/xbanalysis/problems/gba_sflux_problem.py new file mode 100644 index 0000000000000000000000000000000000000000..e431909bc9bf62d5176d69af7ce8fd69ae7be98e --- /dev/null +++ b/xbanalysis/problems/gba_sflux_problem.py @@ -0,0 +1,199 @@ +"""Implementation of GBA scaled flux Problem class. + +Based on Hugo Dourado scaled flux GBA optimization + +Ref: Presentation +Growth Mechanics: the Economy, Control and Optimality of Self-Replication +Metabolic Pathway Analysis 2021, Hugo Dourado + +First step: convert XBA into Hugo RGBA format +Second step: implement optimization (inline with Hugo formalism) + +Peter Schubert, HHU Duesseldorf, September 2022 +""" + +import numpy as np +import pandas as pd +import re +import sympy as sp +import sbmlxdf + +from xbanalysis.model.xba_unit_def import XbaUnit +from xbanalysis.utils.utils import expand_kineticlaw + + +class GbaSfluxProblem: + + def __init__(self, xba_model): + """Initialize GbaSfluxProblem from a xba_model. + + :param xba_model: xba model with all required GBA parameters + :type xba_model: XbaModel + """ + self.xba_model = xba_model + + # while XBA model contains enzymes for all metabolic reactions with related synthesis reactions, + # in scaled flux GBA optimization one a single protein (here we use the ribosome) is used + metab_sids = [s.id for s in xba_model.species.values() if s.sboterm.is_in_branch('SBO:0000247')] + self.sids_e = [sid for sid in metab_sids if xba_model.species[sid].constant is True] + sids_s = [sid for sid in metab_sids if xba_model.species[sid].constant is False] + sid_p = [s.id for s in xba_model.species.values() if s.sboterm.is_in_branch('SBO:0000250')][0] + self.sids_sp = sids_s + [sid_p] + self.sids_esp = self.sids_e + self.sids_sp + + rids_be = [r.id for r in xba_model.reactions.values() if r.sboterm.is_in_branch('SBO:0000167')] + rid_r = xba_model.species[sid_p].ps_rid + self.rids_ber = rids_be + [rid_r] + + self.mw_esp = np.array([xba_model.species[sid].mw for sid in self.sids_esp]) + self.mw_ber = np.array([xba_model.species[xba_model.reactions[rid].enzyme].mw + for rid in self.rids_ber]) + # mw_ber is used for scaling of kcats, weight for protein synthesis needs to be rescaled + s = xba_model.species[sid_p] + if hasattr(s, 'rba_process_costs'): + p_len = s.rba_process_costs['translation'] + else: + r = xba_model.reactions[rid_r] + p_len = min(r.reactants.values()) + self.mw_ber[-1] *= p_len + + # retrieve S-matrix: internal metabos + total protein and metabolic reaction + ribosome synthesis + sm_espxber = xba_model.get_stoic_matrix(self.sids_esp, self.rids_ber) + + # calulate mass turnover in gram reactants/products per mol reactions (g/mol) + sm_espxber_react = np.where(sm_espxber > 0, 0, sm_espxber) + sm_espxber_prod = np.where(sm_espxber < 0, 0, sm_espxber) + mass_turnover_react = -sm_espxber_react.T.dot(self.mw_esp) + mass_turnover_prod = sm_espxber_prod.T.dot(self.mw_esp) + self.mass_turnover = np.max((mass_turnover_react, mass_turnover_prod), axis=0) + + # mass normalized stoichiometric matrix + self.smmn_espxber = (sm_espxber * self.mw_esp.reshape(-1, 1)) / self.mass_turnover + + self.rho = xba_model.parameters['rho'].value + self.cemn = np.array([xba_model.species[sid].initial_conc * xba_model.species[sid].mw + for sid in self.sids_e]) + self.cimn = np.array([xba_model.species[sid].initial_conc * xba_model.species[sid].mw + for sid in sids_s]) + + # identify unit ids for kinetic parameters + units_per_s = [XbaUnit({'kind': 'second', 'exp': -1.0, 'scale': 0, 'mult': 1.0})] + units_mol_per_l = [XbaUnit({'kind': 'litre', 'exp': -1.0, 'scale': 0, 'mult': 1.0}), + XbaUnit({'kind': 'mole', 'exp': 1, 'scale': 0, 'mult': 1.0})] + self.kcat_udid = self.get_unit_def_id(units_per_s) + self.km_udid = self.get_unit_def_id(units_mol_per_l) + self.kcats_global = {pid: p.value for pid, p in xba_model.parameters.items() + if p.units == self.kcat_udid} + self.kms_global = {pid: p.value for pid, p in xba_model.parameters.items() + if p.units == self.km_udid} + + def get_unit_def_id(self, query_units): + for udid, ud in self.xba_model.unit_defs.items(): + if ud.is_equivalent(query_units): + return udid + return None + + def get_kcats(self, rid): + + r = self.xba_model.reactions[rid] + kcats_local = {pid: val for pid, [val, units] in r.local_params.items() if units == self.kcat_udid} + + # analyse kinetic law to identify kcat_f and kcat_r + expanded_kl = sbmlxdf.misc.mathml2numpy(r.kinetic_law) + kl = expand_kineticlaw(expanded_kl, self.xba_model.functions) + used_kcats = {kcat_id: val for kcat_id, val in (self.kcats_global | kcats_local).items() + if re.search(r'\b' + kcat_id + r'\b', kl)} + kcat_f = 0.0 + kcat_r = 0.0 + for kcat_id, val in used_kcats.items(): + # replace other parameters with value of 1.0 (exclude selected kcat and numbers) + kl_kcat = re.sub(r'\b(?!(' + kcat_id + r'\b))[a-zA-Z_]\w*\b', str(1.0), kl) + # set two different kcat values to check which increases/reduces the rate + kl_kcat_v2 = re.sub(r'\b' + kcat_id + r'\b', str(2.0), kl_kcat) + kl_kcat_v10 = re.sub(r'\b' + kcat_id + r'\b', str(10.0), kl_kcat) + if sp.sympify(kl_kcat_v2) > sp.sympify(kl_kcat_v10): + kcat_r = val + else: + kcat_f = val + return kcat_f, kcat_r + + def get_kms(self, rid): + """ + + :param rid: reaction id + :type rid: string + :return: + :rtype: + """ + + r = self.xba_model.reactions[rid] + kms_local = {pid: val for pid, [val, units] in r.local_params.items() if units == self.km_udid} + + # analyse kinetic law to identify Km and Ki values + expanded_kl = sbmlxdf.misc.mathml2numpy(r.kinetic_law) + kl = expand_kineticlaw(expanded_kl, self.xba_model.functions) + used_kms = {km_id: val for km_id, val in (self.kms_global | kms_local).items() + if re.search(r'\b' + km_id + r'\b', kl)} + kms = {} + kis = {} + # check correlation between metabolite and KM value, scaling both equally should not change value of kl + for km_id, val in used_kms.items(): + for sid in r.reactants: + # in first step: replace parameters with value of 1.0 (excluding selected km and sid) + # km_id, sid are excluded from matching, e.g. \b(?!(' + km_id + r'\b)) + # parameter names starting with letter or underscore [a-zA-Z_] are matched + # numbers are not matched, as they do not start with [a-zA-Z_] + # in second phase: protected km_id and sid are replaced by values + kl_km_m = re.sub(r'\b(?!(' + km_id + r'\b))(?!(' + sid + r'\b))[a-zA-Z_]\w*\b', str(1.0), kl) + kl_km_m_v2 = re.sub(r'\b[a-zA-Z_]\w*\b', str(2.0), kl_km_m) + kl_km_m_v10 = re.sub(r'\b[a-zA-Z_]\w*\b', str(10.0), kl_km_m) + if bool(abs(sp.sympify(kl_km_m_v2) - sp.sympify(kl_km_m_v10)) <= 1e-8): + kms[sid] = val + for sid in r.products: + kl_km_m = re.sub(r'\b(?!(' + km_id + r'\b))(?!(' + sid + r'\b))[a-zA-Z_]\w*\b', str(1.0), kl) + kl_km_m_v2 = re.sub(r'\b[a-zA-Z_]\w*\b', str(2.0), kl_km_m) + kl_km_m_v10 = re.sub(r'\b[a-zA-Z_]\w*\b', str(10.0), kl_km_m) + if abs(sp.sympify(kl_km_m_v2) - sp.sympify(kl_km_m_v10)) <= 1e-8: + kis[sid] = val + return kms, kis + + def mass_normalized_model(self): + """Retrieve mass normalized model + + :return: gba model mass normalized as per Hugo Dourado + :rtype: dict of pandas DataFrames + """ + protein_mn = self.rho - sum(self.cimn) + + kcats = np.array([self.get_kcats(rid) for rid in self.rids_ber]).T + skcats = kcats * 3600.0 * self.mass_turnover / self.mw_ber + + km = np.zeros_like(self.smmn_espxber) + ki = np.zeros_like(self.smmn_espxber) + rid2idx = {mrid: idx for idx, mrid in enumerate(self.rids_ber)} + sid2idx = {msid: idx for idx, msid in enumerate(self.sids_esp)} + for rid in self.rids_ber: + kms, kis = self.get_kms(rid) + for sid, val in kms.items(): + km[sid2idx[sid], rid2idx[rid]] = val + for sid, val in kis.items(): + ki[sid2idx[sid], rid2idx[rid]] = val + # scale Michaelis constants with molecluar weights + skm = km * self.mw_esp.reshape(-1, 1) + ski = ki * self.mw_esp.reshape(-1, 1) + ska = np.zeros_like(self.smmn_espxber) + + mn_model = { + 'N': pd.DataFrame(self.smmn_espxber, index=self.sids_esp, columns=self.rids_ber), + 'KM': pd.DataFrame(skm, index=self.sids_esp, columns=self.rids_ber), + 'KI': pd.DataFrame(ski, index=self.sids_esp, columns=self.rids_ber), + 'KA': pd.DataFrame(ska, index=self.sids_esp, columns=self.rids_ber), + 'kcat': pd.DataFrame(skcats, index=['kcat_f', 'kcat_b'], columns=self.rids_ber), + 'conditions': pd.DataFrame(np.hstack((self.rho, self.cemn)), + index=['rho'] + self.sids_e, columns=[1]), + 'lower_c': pd.DataFrame(np.zeros(len(self.sids_sp)), + index=self.sids_sp, columns=['lower']), + 'initial': pd.DataFrame(np.hstack((self.cimn, protein_mn)), + index=self.sids_sp, columns=['initial']), + } + return mn_model