Source code for freeflux.core.mdv

'''Define the MDV class.'''


[docs] __author__ = 'Chao Wu'
import re from functools import lru_cache from collections import OrderedDict, Counter from collections.abc import Iterable from itertools import combinations_with_replacement from numbers import Real import numpy as np from scipy.special import comb from scipy.linalg import inv import warnings warnings.filterwarnings('ignore', category = RuntimeWarning)
[docs] natAbuns = {'H': [0.999885, 0.000115], 'C': [0.9893, 0.0107], 'N': [0.99632, 0.00368], 'O': [0.99757, 0.00038, 0.00205], 'Si': [0.922297, 0.046832, 0.030872], 'S': [0.9493, 0.0076, 0.0429, 0.0002]}
# ref. https://www.chem.ualberta.ca/~massspec/atomic_mass_abund.pdf
[docs] class MDV(): ''' Define MDV (i.e., mass isotopomer distribution vector) object and its operations. Convolution between mdv1 and mdv2 can be performed in three ways: 1. mdv1.conv(mdv2); 2. mdv1*mdv2; 3. mdv.conv(mdv1, mdv2). The zero element for convolution is MDV([0]), and the identity element is MDV([1]). Scalar multiplication (a*mdv) and MDV addition (mdv1 + mdv2) are also supported. In these cases, the resulting MDV are not automatically normalized. In addition to C, MDV can be built based on H, N, O, Si, and S. Parameters ---------- fractions: list or array MDV vector. nonnegative: bool Whether to keep the elements >= 0. normalize: bool Whether to normalize MDV vector to ensure the sum equals one. base_atom: str Base atom for MDV. Attributes ---------- value: array MDV vector. n_atoms: int # of atom. base_atom: str Base atom for MDV. fl (or fractional_labeling): float Fractional labeling. ''' def __init__( self, fractions, nonnegative = True, normalize = True, base_atom = 'C' ): ''' Parameters ---------- fractions: list or array MDV vector. nonnegative: bool Whether to keep the elements >= 0. normalize: bool Whether to normalize MDV vector to ensure the sum == 1. base_atom: str Base atom for MDV. '''
[docs] self.value = np.array(fractions)
if nonnegative: self.value[self.value < 0] = 0 if normalize and self.value.sum() != 0: self.value = self.value / self.value.sum()
[docs] self.base_atom = base_atom
[docs] def __iter__(self): return self.value
[docs] def __getitem__(self, key): ''' Parameters ---------- key: int or slice Index. ''' return self.value[key]
[docs] def __array__(self): return self.value
[docs] def conv(self, mdv): ''' Note. Assume that the mdv convolved has the same base atom. Parameters ---------- mdv: list or array or MDV ''' if not isinstance(mdv, self.__class__): mdv = self.__class__(mdv) mdvConv = gen_conv(self.value, mdv.value) return self.__class__(mdvConv, base_atom=self.base_atom)
[docs] def __mul__(self, other): ''' Parameters ---------- other: scalar, list, array or MDV ''' if isinstance(other, Real): return self.__class__( other*self.value, nonnegative = False, normalize = False, base_atom=self.base_atom ) elif isinstance(other, Iterable): return self.conv(other) else: return NotImplemented
[docs] def __rmul__(self, other): ''' Parameters ---------- other: MDV ''' return self * other
[docs] def __add__(self, other): ''' Parameters ---------- other: MDV ''' if isinstance(other, type(self)): return self.__class__( self.value+other.value, nonnegative = False, normalize = False, base_atom=self.base_atom ) else: return NotImplemented
[docs] def __radd__(self, other): ''' Parameters ---------- other: MDV ''' return self + other
[docs] def _correction_matrix(self, X, n_Xs): ''' Parameters ---------- X: str Which element the correction matrix will be generated for. n_Xs: int # of X atoms in metabolite or metabolite fragment, these atoms will be corrected. ''' corrMat = np.zeros((self.n_atoms+1, self.n_atoms+1)) natMDV = get_natural_MDV(n_Xs, base_atom = X) for i in range(self.n_atoms+1): for j in range(i+1): if i - j <= natMDV.n_atoms: corrMat[i, j] = natMDV.value[i-j] else: corrMat[i, j] = 0 return corrMat
[docs] def correct_for_natural_abundance(self, atom_dict): ''' Parameters ---------- atom_dict: dict element needs to be corrected => # of corresponding atoms in metabolite (fragment). ''' corrMat = np.eye(self.n_atoms+1) for X, n_Xs in atom_dict.items(): corrMatX = self._correction_matrix(X, n_Xs) corrMat = np.dot(corrMat, corrMatX) corrMDV = np.dot(inv(corrMat), self.value) return self.__class__(corrMDV, base_atom=self.base_atom)
[docs] def correct_for_inoculum(self, fraction): ''' Parameters ---------- fraction: float in [0, 1] Fraction of inoculum in biomass measured. ''' natMDV = get_natural_MDV(self.n_atoms, base_atom = self.base_atom) corrMDV = (self.value - fraction*natMDV.value)/(1 - fraction) return self.__class__(corrMDV, base_atom=self.base_atom)
@property @lru_cache()
[docs] def n_atoms(self): return self.value.size - 1
@property @lru_cache()
[docs] def fl(self): if self.n_atoms == 0: raise ValueError('no atom in MDV') else: return round((self.value*np.arange(self.n_atoms+1)).sum()/self.n_atoms, 4)
[docs] fractional_labeling = fl
[docs] def __repr__(self): return f'{self.__class__.__name__}([{", ".join(map(str, self.value.round(3)))}])'
[docs] def _isotopomer_combination(n_atoms, n_natural_isotops): ''' Parameters ---------- n_atoms: int # of atoms. n_natural_isotops: int # of natural isotopmers. Returns ------- combs2: dict Example ------- >>> _isotopomer_combination(2, 3) OrderedDict([(0, [Counter({0: 2})]), (1, [Counter({0: 1, 1: 1})]), (2, [Counter({0: 1, 2: 1}), Counter({1: 2})]), (3, [Counter({1: 1, 2: 1})]), (4, [Counter({2: 2})])]) ''' allCombos = combinations_with_replacement(range(n_natural_isotops), n_atoms) combos1 = {} for combo in allCombos: combos1.setdefault(sum(combo), []).append(combo) combos2 = OrderedDict() for mass in sorted(combos1): for combo in combos1[mass]: combos2.setdefault(mass, []).append(Counter(combo)) return combos2
[docs] def get_natural_MDV(n_atoms, base_atom = 'C'): ''' Calculate the MDV of an unlabeled fragment. Parameters ---------- n_atoms: int # of atoms. base_atom: str Base atom for MDV. ''' natAbun = natAbuns[base_atom] combos = _isotopomer_combination(n_atoms, len(natAbun)) mdv = [] for _, counters in combos.items(): ele = 0 for counter in counters: item = 1 left = n_atoms for isotop, count in counter.items(): item *= comb(left, count) * natAbun[isotop]**count left -= count ele += item mdv.append(ele) return MDV(mdv, base_atom=base_atom)
[docs] def get_substrate_MDV(atom_nos, labeling_pattern, percentage, purity, label_atom='C'): ''' Calculate the MDV of a fragment from labeled substrate. Currently this function only supports H, C or N-labeled substrate. Parameters ---------- atom_nos: list of int Atom NOs, starting from 1. labeling_pattern: str or list of str Labeling pattern of substrate, '0' for unlabeled atom, '1' for labeled atom, e.g., '100000' for 1-13C glucose. List if tracer with multiple labeling patterns are used. Natural substrate (with all '0's) don't need to be explicitly set. If str, labeling_pattern should not be natural substrate. percentage: float or list of float Molar percentage (in range of [0,1]) of corresponding tracer. Sum of percentage should be <= 1, and the rest will be considered as natural substrate. List if tracer with multiple labeling patterns are used. * If list, len(percentage) should be equal to len(labeling_pattern). * If float, labeling_pattern should not be natural substrate. purity: float or list of float Labeled atom purity (in range of [0,1]) of corresponding tracer. List if tracer with multiple labeling patterns are used. * If list, len(purity) should be equal to len(labeling_pattern). * If float, labeling_pattern should not be natural substrate. label_atom: str Labeled atom, i.e., base atom in the MDV. Currently supports only "H", "C" and "N". ''' if not isinstance(labeling_pattern, list): if re.match(r'^0+$', labeling_pattern): raise ValueError('use mdv.get_natural_MDV for natural substrate') labeling_pattern = [labeling_pattern] if not isinstance(percentage, list): percentage = [percentage] if sum(percentage) > 1: raise ValueError('sum of tracer percentage should be <= 1') if not isinstance(purity, list): purity = [purity] nAtoms = len(atom_nos) singleAtomMdv = {} for pat, pur, in zip(labeling_pattern, purity): singleAtomMdv[pat] = { '1': MDV([1-pur, pur], base_atom=label_atom), '0': get_natural_MDV(1, base_atom = label_atom) } mdvAllTracer = MDV(np.zeros(nAtoms+1), base_atom=label_atom) for pat, per, pur in zip(labeling_pattern, percentage, purity): mdvPerTracer = MDV([1], base_atom=label_atom) for atomNO in atom_nos: mdvPerTracer *= singleAtomMdv[pat][pat[atomNO-1]] mdvAllTracer += per*mdvPerTracer mdvNatural = (1 - sum(percentage))*get_natural_MDV(nAtoms, base_atom = label_atom) return mdvAllTracer + mdvNatural
[docs] def gen_conv(arr1, arr2): ''' Parameters ---------- arr1, arr2: list or array If one of the arrs is 2-D, the function performs 2-D convolution of MDV (array) and MDV derivative. ''' nAtoms1 = len(arr1) - 1 nAtoms2 = len(arr2) - 1 if nAtoms2 > nAtoms1: arr1, arr2 = arr2, arr1 nAtoms1, nAtoms2 = nAtoms2, nAtoms1 arrConv = [] for i in range(nAtoms1+nAtoms2+1): if i <= nAtoms2: arrConv.append(sum([arr1[i-j] * arr2[j] for j in range(i+1)])) elif nAtoms2 < i <= nAtoms1: arrConv.append(sum([arr1[i-j] * arr2[j] for j in range(nAtoms2+1)])) else: arrConv.append(sum([arr1[i-j] * arr2[j] for j in range(i-nAtoms1, nAtoms2+1)])) return np.array(arrConv)
[docs] def conv(mdv1, mdv2): ''' Perform convolution between two MDVs. Parameters ---------- mdv1: list or array or MDV mdv2: list or array or MDV Returns ------- mdv: MDV ''' if not isinstance(mdv1, MDV): mdv1 = MDV(mdv1) if not isinstance(mdv2, MDV): mdv2 = MDV(mdv2) return mdv1 * mdv2
[docs] def diff_conv(mdv_mdvder1, mdv_mdvder2): ''' Parameters ---------- mdv_mdvder1: 2-list of arrays namely [MDV, MDVder], MDVder in shape of (len(MDV), len(v)) mdv_mdvder2:: 2-list of arrays namely [MDV, MDVder], MDVder in shape of (len(MDV), len(v)) Returns ------- mdv: array mdvder: 2-D array In shape of (len(MDV), len(v)). ''' mdv1, mdvder1 = mdv_mdvder1 mdv2, mdvder2 = mdv_mdvder2 if isinstance(mdv1, MDV): mdv1 = mdv1.value if isinstance(mdv2, MDV): mdv2 = mdv2.value mdv = gen_conv(mdv1, mdv2) mdvder = gen_conv(mdv1, mdvder2) + gen_conv(mdv2, mdvder1) return mdv, mdvder