Source code for pertpy.tools._differential_gene_expression._statsmodels
import numpy as np
import pandas as pd
import scanpy as sc
import statsmodels
import statsmodels.api as sm
from tqdm.auto import tqdm
from ._base import LinearModelBase
from ._checks import check_is_numeric_matrix
[docs]
class Statsmodels(LinearModelBase):
"""Differential expression test using a statsmodels linear regression."""
def _check_counts(self):
check_is_numeric_matrix(self.data)
[docs]
def fit(
self,
regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
**kwargs,
) -> None:
"""Fit the specified regression model.
Args:
regression_model: A statsmodels regression model class, either OLS or GLM.
**kwargs: Additional arguments for fitting the specific method. In particular, this
is where you can specify the family for GLM.
Examples:
>>> import statsmodels.api as sm
>>> import pertpy as pt
>>> model = pt.tl.Statsmodels(adata, design="~condition")
>>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
>>> results = model.test_contrasts(np.array([0, 1]))
"""
self.models = []
for var in tqdm(self.adata.var_names):
mod = regression_model(
sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
self.design,
**kwargs,
)
mod = mod.fit()
self.models.append(mod)
def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
res = []
for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
t_test = mod.t_test(contrast)
res.append(
{
"variable": var,
"p_value": t_test.pvalue,
"t_value": t_test.tvalue.item(),
"sd": t_test.sd.item(),
"log_fc": t_test.effect.item(),
}
)
return (
pd.DataFrame(res)
.sort_values("p_value")
.assign(adj_p_value=lambda x: statsmodels.stats.multitest.fdrcorrection(x["p_value"])[1])
)