Source code for freeflux.analysis.stats

'''Define statistic functions.'''



[docs]
__author__ = 'Chao Wu'



from os import makedirs
import numpy as np
import pandas as pd
from scipy.stats import t, chi2, probplot, zscore
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', category = RuntimeWarning)



[docs]
def _chi2_test(obj_value, dof, confidence_level):
    '''
    Parameters
    ----------
    obj_value: float
        Objective value.
    dof: int
        Degree of freedom.
    confidence_level: float
        Confidence level, e.g., 0.95 as 95% confidence level.
    '''
    
    chi2Lb = chi2.ppf((1-confidence_level)/2, dof)
    chi2Ub = chi2.ppf((1+confidence_level)/2, dof)
    
    flag = '' if chi2Lb <= obj_value <= chi2Ub else 'not '
    
    print(
        f'SSR {obj_value:.2f} {flag}acceptable,'
        f' which is {flag}in [{round(chi2Lb, 2)}, {round(chi2Ub, 2)}]'
        f' of the {confidence_level*100}%% confidence level by chi2 test'
    )




[docs]
def _normal_probability(resids, show_fig, output_dir):
    '''
    Parameters
    ----------
    resids: array
        Residuals.
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.
    '''
    
    probplot(resids, plot = plt)
    ax = plt.gca()
    for item in [ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels():
        item.set_fontsize(20)
    ax.set_ylabel('Weighted residuals')
    ax.set_title('')
    
    if output_dir:
        makedirs(output_dir, exist_ok = True)
        plt.savefig(f'{output_dir}/normal_probability_plot.jpg', dpi = 300, bbox_inches = 'tight')
    
    if show_fig:
        plt.show()
    
    plt.close()




[docs]
def _simulated_vs_measured_measurements(
        sim_meas, 
        exp_meas, 
        exp_meas_err, 
        xlabel, 
        ylabel, 
        xticklabels, 
        filename, 
        show_fig, 
        output_dir
):
    '''
    Parameters
    ----------
    sim_meas: array or list
        Simulated measurements.
    exp_meas: array or list
        Measured measurements.
    exp_meas_err: array or list
        Errors of measured measurements.
    xlabel: str
        Xlabel.
    ylabel: str
        Ylabel.
    xticklabels: array or list
        Xticklabels.
    filename: str
        File name.    
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.
    '''
    
    sim_meas = np.array(sim_meas)
    exp_meas = np.array(exp_meas)
    exp_meas_err = np.array(exp_meas_err)
    
    barWidth = 0.4
    xPos = np.arange(1, sim_meas.size+1)
    
    plt.bar(
        x = xPos - 0.5*barWidth, 
        height = sim_meas, 
        width = barWidth, 
        label = 'simulated'
    )
    plt.bar(
        x = xPos + 0.5*barWidth, 
        height = exp_meas, 
        yerr = exp_meas_err, 
        width = barWidth, 
        label = 'measured', 
        capsize = 3
    )
    plt.xticks(ticks = xPos, labels = xticklabels, fontsize = 20)
    plt.tick_params(axis = 'y', labelsize = 20)
    plt.xlabel(xlabel, fontsize = 20)
    plt.ylabel(ylabel, fontsize = 20)
    
    plt.legend(
        loc = 'center', 
        bbox_to_anchor = (1.25, 0.5), 
        fontsize = 20, 
        frameon = False
    )
    
    if output_dir:
        makedirs(output_dir, exist_ok = True)
        #filename = xlabel if xlabel else ylabel
        plt.savefig(f'{output_dir}/{filename}.jpg', dpi = 300, bbox_inches = 'tight')
    
    if show_fig:
        plt.show()
    
    plt.close()

    
    

[docs]
def _simulated_vs_measured_MDVs(simulated_MDVs, measured_MDVs, show_fig, output_dir):
    '''
    Parameters
    ----------
    simulated_MDVs: dict
        EMU ID => simulated MDV.
    measured_MDVs: dict
        EMU ID => [means, sds].
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.    
    '''
    
    for emuid in measured_MDVs:
        simMDV = simulated_MDVs[emuid]
        expMDV, expMDVerr = measured_MDVs[emuid]
        xticklabels = [f'M{str(i)}' for i in range(simMDV.size)]
        _simulated_vs_measured_measurements(
            simMDV, 
            expMDV, 
            expMDVerr, 
            emuid, 
            'Isotopomer fraction', 
            xticklabels, 
            'sim_vs_exp_MDVs-'+emuid, 
            show_fig, 
            output_dir
        )

        
    

[docs]
def _simulated_vs_measured_fluxes(simulated_fluxes, measured_fluxes, show_fig, output_dir):
    '''
    Parameters
    ----------
    simulated_fluxes: dict
        Flux ID => simulated flux.
    measured_fluxes: dict
        Flux ID => [mean, sd].
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.
    '''
    
    simFluxes = []
    expFluxes = []
    expFluxesErr = []
    xticklabels = []
    for fluxid, simFlux in simulated_fluxes.items():
        expFlux, expFluxErr = measured_fluxes[fluxid]
        simFluxes.append(simFlux)
        expFluxes.append(expFlux)
        expFluxesErr.append(expFluxErr)
        xticklabels.append(fluxid)
    
    _simulated_vs_measured_measurements(
        simFluxes, 
        expFluxes, 
        expFluxesErr, 
        '', 
        'Flux', 
        xticklabels, 
        'sim_vs_exp_fluxes', 
        show_fig, 
        output_dir
    )




[docs]
def _confidence_intervals_le(res, irr, cov, dof, confidence_level):
    '''
    Parameters
    ----------
    res: ser
        Optimal results, e.g., net fluxes, total fluxes or concentrations.
    irr: list
        Irreversible items. total fluxes and concentrations are all considered
        irreversible.
    cov: array
        Corvariance matrix of free fluxes obtained from hessian at convergence.
    dof: int
        Degree of freedom.
    confidence_level: float
        Confidence level, e.g., 0.95 as 95% confidence level.
    '''
    
    errors = t.ppf((1+confidence_level)/2, dof)/(dof+1)**0.5*np.diag(cov)**0.5
    lbs = res - errors
    ubs = res + errors
    
    ranges = {}
    for varid in res.index:
        if varid in irr:
            ranges[varid] = [max(0, lbs[varid]), ubs[varid]]
        else:
            ranges[varid] = [lbs[varid], ubs[varid]]
   
    return ranges

    


[docs]
def _confidence_intervals_mc(res_set, irr, confidence_level):
    '''
    Parameters
    ----------
    res_set: list of ser
        Set of optimal net fluxes. Total fluxes or concentrations.
    irr: list
        Irreversible items. Total fluxes and concentrations are all considered
        irreversible.
    confidence_level: float
        Confidence level, e.g., 0.95 as 95% confidence level.
    '''
    
    resSet = pd.DataFrame(res_set)
    resSet = resSet[(np.abs(zscore(resSet)) < 3).all(axis = 1)]
    
    quantile = resSet.quantile([(1-confidence_level)/2, (1+confidence_level)/2])
    mask = resSet.apply(lambda col: col.between(*quantile[col.name]), axis = 0)
    selected = resSet[mask.all(axis = 1)]

    if selected.empty:
        raise ValueError('can not estimate CIs, need more Monte Carlo runs')
    else:
        ranges = {}
        for varid, col in selected.items():
            if varid in irr:
                ranges[varid] = [max(0, col.min()), col.max()]
            else:
                ranges[varid] = [col.min(), col.max()]
      
        return ranges




[docs]
def _MDV_kinetics(emuid, simulated_inst_MDVs, show_fig, output_dir):
    '''
    Parameters
    ----------
    emuid: str
        EMU ID.
    simulated_inst_MDVs: dict
        Timepoint => MDV.
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.
    '''
    
    tpoints = sorted(simulated_inst_MDVs)
    ts = np.linspace(tpoints[0], tpoints[-1], 100)
    mdvs = np.array(list(simulated_inst_MDVs.values()))
    interpMDVs = interp1d(tpoints, mdvs, axis = 0, kind = 1)(ts)
    
    for i in range(interpMDVs.shape[1]):
        plt.plot(ts, interpMDVs[:, i], label = 'M%s' % i, linewidth = 2)
        
    plt.xlabel('Time (s)', fontsize = 20)
    plt.ylabel('EMU %s' % emuid, fontsize = 20)
    plt.tick_params(labelsize = 20)
    plt.legend(
        loc = 'center', 
        bbox_to_anchor = (1.15, 0.5), 
        fontsize = 20, 
        frameon = False
    )
    
    if output_dir:
        makedirs(output_dir, exist_ok = True)
        plt.savefig(f'{output_dir}/{emuid}_kinetics.jpg', dpi = 300, bbox_inches = 'tight')
    
    if show_fig:
        plt.show()
    
    plt.close()

    
    

[docs]
def _simulated_vs_measured_inst_MDVs(
        simulated_inst_MDVs, 
        measured_inst_MDVs, 
        show_fig, 
        output_dir
):
    '''
    Parameters
    ----------
    simulated_inst_MDVs: dict
        EMU ID => {t => simulated MDV}.
    measured_inst_MDVs: dict
        EMU ID => {t => [means, sds]}.
    show_fig: bool
        Whether to show figure.
    output_dir: str
        Output directory.
    '''
    
    for emuid in measured_inst_MDVs:
        simInstMDVs = simulated_inst_MDVs[emuid]
        expInstMDVs = measured_inst_MDVs[emuid]
    
        tpoints_sim = sorted(simInstMDVs)
        ts = np.linspace(tpoints_sim[0], tpoints_sim[-1], 100)
        mdvs = np.array(list(simInstMDVs.values()))
        interpMDVs = interp1d(tpoints_sim, mdvs, axis = 0, kind = 1)(ts)
        
        tpoint_exp = sorted(expInstMDVs)
        expMDVs = np.array([expInstMDVs[t][0] for t in tpoint_exp])
        expMDVerrs = np.array([expInstMDVs[t][1] for t in tpoint_exp])
        
        colors = []
        for i, simMDV in enumerate(interpMDVs.T):
            p = plt.plot(ts, simMDV, label = 'M%s' % i, linewidth = 2)
            colors.append(p[0].get_color())
        legend = plt.legend(
            loc = 'center', 
            bbox_to_anchor = (1.15, 0.5), 
            title = 'simulated', 
            title_fontsize = 20, 
            fontsize = 20, 
            frameon = False
        )
        plt.gca().add_artist(legend)    
        
        handles = []
        labels = []
        for i, (expMDV, expMDVerr) in enumerate(zip(expMDVs.T, expMDVerrs.T)):
            e = plt.errorbar(
                tpoint_exp, 
                expMDV, 
                expMDVerr, 
                color = colors[i], 
                linestyle = '',
                marker = '.', 
                markersize = 15, 
                elinewidth = 2, 
                capsize = 5
            )
            handles.append(e[0])
            labels.append('M%s' % i)
        plt.legend(
            handles = handles, 
            labels = labels, 
            loc = 'center', 
            bbox_to_anchor = (1.45, 0.5), 
            title = 'measured', 
            title_fontsize = 20, 
            fontsize = 20, 
            frameon = False
        )
            
        plt.xlabel('Time (s)', fontsize = 20)
        plt.ylabel(f'EMU {emuid}', fontsize = 20)
        plt.tick_params(labelsize = 20)
        
        if output_dir:
            makedirs(output_dir, exist_ok = True)
            plt.savefig(f'{output_dir}/{emuid}_inst.jpg', dpi = 300, bbox_inches = 'tight')
        
        if show_fig:
            plt.show()
        
        plt.close()

            


[docs]
def _contribution_matrix(cov, trans_mat, simulated_der, measured_cov):
    '''
    Parameters
    ----------
    cov: array
        Corvariance matrix of free fluxes obtained from hessian at convergence.
    trans_mat: array
        Transformation matrix from free flux to total flux or net flux, i.e.,
        N to total flux, T@N to net flux.
    simulated_der: array
        Derivative of simulated measurements w.r.t. free fluxes.
    measured_cov: array
        Corvariance matrix of measurements.
    '''

    fluxesCov = trans_mat@cov@trans_mat.T
    fluxes_measCov = trans_mat@cov@simulated_der.T

    contribMat = np.empty_like(fluxes_measCov)
    for i in range(contribMat.shape[0]):
        for j in range(contribMat.shape[1]):
            contribMat[i,j] = fluxes_measCov[i,j]**2/(fluxesCov[i,i]*measured_cov[j,j])

    return contribMat




[docs]
def _sensitivity(cov, trans_mat, simulated_der, measured_inv_cov):
    '''
    Parameters
    ----------
    cov: array
        Corvariance matrix of free fluxes obtained from hessian at convergence.
    trans_mat: array
        Transformation matrix from free flux to total flux or net flux, i.e.,
        N to total flux, T@N to net flux.
    simulated_der: array
        Derivative of simulated measurements w.r.t. free fluxes.
    measured_inv_cov: array
        Inversed corvariance matrix of measurements.
    '''

    return trans_mat@cov@simulated_der.T@measured_inv_cov
Source code for freeflux.analysis.stats

freeflux

Navigation

Related Topics