Source code for pertpy.tools._differential_gene_expression._edger
from collections.abc import Sequence
import numpy as np
import pandas as pd
from lamin_utils import logger
from scipy.sparse import issparse
from ._base import LinearModelBase
from ._checks import check_is_integer_matrix
[docs]
class EdgeR(LinearModelBase):
"""Differential expression test using EdgeR."""
def _check_counts(self):
check_is_integer_matrix(self.data)
[docs]
def fit(self, **kwargs): # adata, design, mask, layer
"""Fit model using edgeR.
Note: this creates its own AnnData object for downstream.
Args:
**kwargs: Keyword arguments specific to glmQLFit()
"""
try:
from rpy2 import robjects as ro
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.conversion import get_conversion, localconverter
from rpy2.robjects.packages import importr
except ImportError:
raise ImportError("edger requires rpy2 to be installed.") from None
try:
edger = importr("edgeR")
except ImportError as e:
raise ImportError(
"edgeR requires a valid R installation with the following packages:\nedgeR, BiocParallel, RhpcBLASctl"
) from e
# Convert dataframe
with localconverter(get_conversion() + numpy2ri.converter):
expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
expr = expr.T.toarray() if issparse(expr) else expr.T
with localconverter(get_conversion() + pandas2ri.converter) as cv:
expr_r = cv.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
samples_r = cv.py2rpy(self.adata.obs)
dge = edger.DGEList(counts=expr_r, samples=samples_r)
logger.info("Calculating NormFactors")
dge = edger.calcNormFactors(dge)
with localconverter(get_conversion() + numpy2ri.converter) as cv:
design_r = cv.py2rpy(self.design.values)
logger.info("Estimating Dispersions")
dge = edger.estimateDisp(dge, design=design_r)
logger.info("Fitting linear model")
fit = edger.glmQLFit(dge, design=design_r, **kwargs)
ro.globalenv["fit"] = fit
self.fit = fit
def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame: # noqa: D417
"""Conduct test for each contrast and return a data frame.
Args:
contrast: numpy array of integars indicating contrast i.e. [-1, 0, 1, 0, 0]
"""
## -- Check installations
# For running in notebook
# pandas2ri.activate()
# rpy2.robjects.numpy2ri.activate()
# ToDo:
# parse **kwargs to R function
# Fix mask for .fit()
try:
from rpy2 import robjects as ro
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.conversion import get_conversion, localconverter
from rpy2.robjects.packages import importr
except ImportError:
raise ImportError("edger requires rpy2 to be installed.") from None
try:
importr("edgeR")
except ImportError:
raise ImportError(
"edgeR requires a valid R installation with the following packages: edgeR, BiocParallel, RhpcBLASctl"
) from None
# Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.
with localconverter(get_conversion() + numpy2ri.converter) as cv:
contrast_vec_r = cv.py2rpy(np.asarray(contrast))
ro.globalenv["contrast_vec"] = contrast_vec_r
# Test contrast with R
ro.r(
"""
test = edgeR::glmQLFTest(fit, contrast=contrast_vec)
de_res = edgeR::topTags(test, n=Inf, adjust.method="BH", sort.by="PValue")$table
"""
)
# Retrieve the `de_res` object
de_res = ro.globalenv["de_res"]
# If already a Pandas DataFrame, return it directly
if isinstance(de_res, pd.DataFrame):
de_res.index.name = "variable"
return de_res.reset_index().rename(columns={"PValue": "p_value", "logFC": "log_fc", "FDR": "adj_p_value"})
# Convert to Pandas DataFrame if still an R object
with localconverter(get_conversion() + pandas2ri.converter) as cv:
de_res = cv.rpy2py(de_res)
de_res.index.name = "variable"
de_res = de_res.reset_index()
return de_res.rename(columns={"PValue": "p_value", "logFC": "log_fc", "FDR": "adj_p_value"})