'''Define statistic functions.'''
from os import makedirs
import numpy as np
import pandas as pd
from scipy.stats import t, chi2, probplot, zscore
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', category = RuntimeWarning)
[docs]
def _chi2_test(obj_value, dof, confidence_level):
'''
Parameters
----------
obj_value: float
Objective value.
dof: int
Degree of freedom.
confidence_level: float
Confidence level, e.g., 0.95 as 95% confidence level.
'''
chi2Lb = chi2.ppf((1-confidence_level)/2, dof)
chi2Ub = chi2.ppf((1+confidence_level)/2, dof)
flag = '' if chi2Lb <= obj_value <= chi2Ub else 'not '
print(
f'SSR {obj_value:.2f} {flag}acceptable,'
f' which is {flag}in [{round(chi2Lb, 2)}, {round(chi2Ub, 2)}]'
f' of the {confidence_level*100}%% confidence level by chi2 test'
)
[docs]
def _normal_probability(resids, show_fig, output_dir):
'''
Parameters
----------
resids: array
Residuals.
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
probplot(resids, plot = plt)
ax = plt.gca()
for item in [ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels():
item.set_fontsize(20)
ax.set_ylabel('Weighted residuals')
ax.set_title('')
if output_dir:
makedirs(output_dir, exist_ok = True)
plt.savefig(f'{output_dir}/normal_probability_plot.jpg', dpi = 300, bbox_inches = 'tight')
if show_fig:
plt.show()
plt.close()
[docs]
def _simulated_vs_measured_measurements(
sim_meas,
exp_meas,
exp_meas_err,
xlabel,
ylabel,
xticklabels,
filename,
show_fig,
output_dir
):
'''
Parameters
----------
sim_meas: array or list
Simulated measurements.
exp_meas: array or list
Measured measurements.
exp_meas_err: array or list
Errors of measured measurements.
xlabel: str
Xlabel.
ylabel: str
Ylabel.
xticklabels: array or list
Xticklabels.
filename: str
File name.
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
sim_meas = np.array(sim_meas)
exp_meas = np.array(exp_meas)
exp_meas_err = np.array(exp_meas_err)
barWidth = 0.4
xPos = np.arange(1, sim_meas.size+1)
plt.bar(
x = xPos - 0.5*barWidth,
height = sim_meas,
width = barWidth,
label = 'simulated'
)
plt.bar(
x = xPos + 0.5*barWidth,
height = exp_meas,
yerr = exp_meas_err,
width = barWidth,
label = 'measured',
capsize = 3
)
plt.xticks(ticks = xPos, labels = xticklabels, fontsize = 20)
plt.tick_params(axis = 'y', labelsize = 20)
plt.xlabel(xlabel, fontsize = 20)
plt.ylabel(ylabel, fontsize = 20)
plt.legend(
loc = 'center',
bbox_to_anchor = (1.25, 0.5),
fontsize = 20,
frameon = False
)
if output_dir:
makedirs(output_dir, exist_ok = True)
#filename = xlabel if xlabel else ylabel
plt.savefig(f'{output_dir}/{filename}.jpg', dpi = 300, bbox_inches = 'tight')
if show_fig:
plt.show()
plt.close()
[docs]
def _simulated_vs_measured_MDVs(simulated_MDVs, measured_MDVs, show_fig, output_dir):
'''
Parameters
----------
simulated_MDVs: dict
EMU ID => simulated MDV.
measured_MDVs: dict
EMU ID => [means, sds].
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
for emuid in measured_MDVs:
simMDV = simulated_MDVs[emuid]
expMDV, expMDVerr = measured_MDVs[emuid]
xticklabels = [f'M{str(i)}' for i in range(simMDV.size)]
_simulated_vs_measured_measurements(
simMDV,
expMDV,
expMDVerr,
emuid,
'Isotopomer fraction',
xticklabels,
'sim_vs_exp_MDVs-'+emuid,
show_fig,
output_dir
)
[docs]
def _simulated_vs_measured_fluxes(simulated_fluxes, measured_fluxes, show_fig, output_dir):
'''
Parameters
----------
simulated_fluxes: dict
Flux ID => simulated flux.
measured_fluxes: dict
Flux ID => [mean, sd].
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
simFluxes = []
expFluxes = []
expFluxesErr = []
xticklabels = []
for fluxid, simFlux in simulated_fluxes.items():
expFlux, expFluxErr = measured_fluxes[fluxid]
simFluxes.append(simFlux)
expFluxes.append(expFlux)
expFluxesErr.append(expFluxErr)
xticklabels.append(fluxid)
_simulated_vs_measured_measurements(
simFluxes,
expFluxes,
expFluxesErr,
'',
'Flux',
xticklabels,
'sim_vs_exp_fluxes',
show_fig,
output_dir
)
[docs]
def _confidence_intervals_le(res, irr, cov, dof, confidence_level):
'''
Parameters
----------
res: ser
Optimal results, e.g., net fluxes, total fluxes or concentrations.
irr: list
Irreversible items. total fluxes and concentrations are all considered
irreversible.
cov: array
Corvariance matrix of free fluxes obtained from hessian at convergence.
dof: int
Degree of freedom.
confidence_level: float
Confidence level, e.g., 0.95 as 95% confidence level.
'''
errors = t.ppf((1+confidence_level)/2, dof)/(dof+1)**0.5*np.diag(cov)**0.5
lbs = res - errors
ubs = res + errors
ranges = {}
for varid in res.index:
if varid in irr:
ranges[varid] = [max(0, lbs[varid]), ubs[varid]]
else:
ranges[varid] = [lbs[varid], ubs[varid]]
return ranges
[docs]
def _confidence_intervals_mc(res_set, irr, confidence_level):
'''
Parameters
----------
res_set: list of ser
Set of optimal net fluxes. Total fluxes or concentrations.
irr: list
Irreversible items. Total fluxes and concentrations are all considered
irreversible.
confidence_level: float
Confidence level, e.g., 0.95 as 95% confidence level.
'''
resSet = pd.DataFrame(res_set)
resSet = resSet[(np.abs(zscore(resSet)) < 3).all(axis = 1)]
quantile = resSet.quantile([(1-confidence_level)/2, (1+confidence_level)/2])
mask = resSet.apply(lambda col: col.between(*quantile[col.name]), axis = 0)
selected = resSet[mask.all(axis = 1)]
if selected.empty:
raise ValueError('can not estimate CIs, need more Monte Carlo runs')
else:
ranges = {}
for varid, col in selected.items():
if varid in irr:
ranges[varid] = [max(0, col.min()), col.max()]
else:
ranges[varid] = [col.min(), col.max()]
return ranges
[docs]
def _MDV_kinetics(emuid, simulated_inst_MDVs, show_fig, output_dir):
'''
Parameters
----------
emuid: str
EMU ID.
simulated_inst_MDVs: dict
Timepoint => MDV.
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
tpoints = sorted(simulated_inst_MDVs)
ts = np.linspace(tpoints[0], tpoints[-1], 100)
mdvs = np.array(list(simulated_inst_MDVs.values()))
interpMDVs = interp1d(tpoints, mdvs, axis = 0, kind = 1)(ts)
for i in range(interpMDVs.shape[1]):
plt.plot(ts, interpMDVs[:, i], label = 'M%s' % i, linewidth = 2)
plt.xlabel('Time (s)', fontsize = 20)
plt.ylabel('EMU %s' % emuid, fontsize = 20)
plt.tick_params(labelsize = 20)
plt.legend(
loc = 'center',
bbox_to_anchor = (1.15, 0.5),
fontsize = 20,
frameon = False
)
if output_dir:
makedirs(output_dir, exist_ok = True)
plt.savefig(f'{output_dir}/{emuid}_kinetics.jpg', dpi = 300, bbox_inches = 'tight')
if show_fig:
plt.show()
plt.close()
[docs]
def _simulated_vs_measured_inst_MDVs(
simulated_inst_MDVs,
measured_inst_MDVs,
show_fig,
output_dir
):
'''
Parameters
----------
simulated_inst_MDVs: dict
EMU ID => {t => simulated MDV}.
measured_inst_MDVs: dict
EMU ID => {t => [means, sds]}.
show_fig: bool
Whether to show figure.
output_dir: str
Output directory.
'''
for emuid in measured_inst_MDVs:
simInstMDVs = simulated_inst_MDVs[emuid]
expInstMDVs = measured_inst_MDVs[emuid]
tpoints_sim = sorted(simInstMDVs)
ts = np.linspace(tpoints_sim[0], tpoints_sim[-1], 100)
mdvs = np.array(list(simInstMDVs.values()))
interpMDVs = interp1d(tpoints_sim, mdvs, axis = 0, kind = 1)(ts)
tpoint_exp = sorted(expInstMDVs)
expMDVs = np.array([expInstMDVs[t][0] for t in tpoint_exp])
expMDVerrs = np.array([expInstMDVs[t][1] for t in tpoint_exp])
colors = []
for i, simMDV in enumerate(interpMDVs.T):
p = plt.plot(ts, simMDV, label = 'M%s' % i, linewidth = 2)
colors.append(p[0].get_color())
legend = plt.legend(
loc = 'center',
bbox_to_anchor = (1.15, 0.5),
title = 'simulated',
title_fontsize = 20,
fontsize = 20,
frameon = False
)
plt.gca().add_artist(legend)
handles = []
labels = []
for i, (expMDV, expMDVerr) in enumerate(zip(expMDVs.T, expMDVerrs.T)):
e = plt.errorbar(
tpoint_exp,
expMDV,
expMDVerr,
color = colors[i],
linestyle = '',
marker = '.',
markersize = 15,
elinewidth = 2,
capsize = 5
)
handles.append(e[0])
labels.append('M%s' % i)
plt.legend(
handles = handles,
labels = labels,
loc = 'center',
bbox_to_anchor = (1.45, 0.5),
title = 'measured',
title_fontsize = 20,
fontsize = 20,
frameon = False
)
plt.xlabel('Time (s)', fontsize = 20)
plt.ylabel(f'EMU {emuid}', fontsize = 20)
plt.tick_params(labelsize = 20)
if output_dir:
makedirs(output_dir, exist_ok = True)
plt.savefig(f'{output_dir}/{emuid}_inst.jpg', dpi = 300, bbox_inches = 'tight')
if show_fig:
plt.show()
plt.close()
[docs]
def _contribution_matrix(cov, trans_mat, simulated_der, measured_cov):
'''
Parameters
----------
cov: array
Corvariance matrix of free fluxes obtained from hessian at convergence.
trans_mat: array
Transformation matrix from free flux to total flux or net flux, i.e.,
N to total flux, T@N to net flux.
simulated_der: array
Derivative of simulated measurements w.r.t. free fluxes.
measured_cov: array
Corvariance matrix of measurements.
'''
fluxesCov = trans_mat@cov@trans_mat.T
fluxes_measCov = trans_mat@cov@simulated_der.T
contribMat = np.empty_like(fluxes_measCov)
for i in range(contribMat.shape[0]):
for j in range(contribMat.shape[1]):
contribMat[i,j] = fluxes_measCov[i,j]**2/(fluxesCov[i,i]*measured_cov[j,j])
return contribMat
[docs]
def _sensitivity(cov, trans_mat, simulated_der, measured_inv_cov):
'''
Parameters
----------
cov: array
Corvariance matrix of free fluxes obtained from hessian at convergence.
trans_mat: array
Transformation matrix from free flux to total flux or net flux, i.e.,
N to total flux, T@N to net flux.
simulated_der: array
Derivative of simulated measurements w.r.t. free fluxes.
measured_inv_cov: array
Inversed corvariance matrix of measurements.
'''
return trans_mat@cov@simulated_der.T@measured_inv_cov