Source code for pertpy.metadata._moa

from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
from scanpy import settings

from pertpy.data._dataloader import _download

from ._look_up import LookUp
from ._metadata import MetaData

if TYPE_CHECKING:
    from anndata import AnnData


[docs] class Moa(MetaData): """Utilities to fetch metadata for mechanism of action studies.""" def __init__(self): self.clue = None def _download_clue(self) -> None: clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt" if not Path(clue_path).exists(): _download( url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt", output_file_name="repurposing_drugs_20200324.txt", output_path=settings.cachedir, block_size=4096, is_zip=False, ) self.clue = pd.read_csv(clue_path, sep=" ", skiprows=9) self.clue = self.clue[["pert_iname", "moa", "target"]]
[docs] def annotate( self, adata: AnnData, query_id: str = "perturbation", target: str | None = None, verbosity: int | str = 5, copy: bool = False, ) -> AnnData: """Annotate cells affected by perturbations by mechanism of action. For each cell, we fetch the mechanism of action and molecular targets of the compounds sourced from clue.io. Args: adata: The data object to annotate. query_id: The column of `.obs` with the name of a perturbagen. Defaults to 'perturbation'. target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets. Defaults to None. verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'. Defaults to 5. copy: Determines whether a copy of the `adata` is returned. Defaults to False. Returns: Returns an AnnData object with MoA annotation. """ if copy: adata = adata.copy() if query_id not in adata.obs.columns: raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.\n" "Please check again.") if self.clue is None: self._download_clue() identifier_num_all = len(adata.obs[query_id].unique()) not_matched_identifiers = list(set(adata.obs[query_id].str.lower()) - set(self.clue["pert_iname"].str.lower())) self._warn_unmatch( total_identifiers=identifier_num_all, unmatched_identifiers=not_matched_identifiers, query_id=query_id, reference_id="pert_iname", metadata_type="moa", verbosity=verbosity, ) adata.obs = ( adata.obs.merge( self.clue, left_on=adata.obs[query_id].str.lower(), right_on=self.clue["pert_iname"].str.lower(), how="left", suffixes=("", "_fromMeta"), ) .set_index(adata.obs.index) .drop("key_0", axis=1) ) # If target column is given, check whether it is one of the targets listed in the metadata # If inconsistent, treat this perturbagen as unmatched and overwrite the annotated metadata with NaN if target is not None: target_meta = "target" if target != "target" else "target_fromMeta" adata.obs[target_meta] = adata.obs[target_meta].mask( ~adata.obs.apply(lambda row: str(row[target]) in str(row[target_meta]), axis=1) ) pertname_meta = "pert_iname" if query_id != "pert_iname" else "pert_iname_fromMeta" adata.obs.loc[adata.obs[target_meta].isna(), [pertname_meta, "moa"]] = np.nan # If query_id and reference_id have different names, there will be a column for each of them after merging # which is redundant as they refer to the same information. if query_id != "pert_iname": del adata.obs["pert_iname"] return adata
[docs] def lookup(self) -> LookUp: """Generate LookUp object for Moa metadata. The LookUp object provides an overview of the metadata to annotate. annotate_moa function has a corresponding lookup function in the LookUp object, where users can search the query_ids and targets in the metadata. Returns: Returns a LookUp object specific for MoA annotation. """ if self.clue is None: self._download_clue() return LookUp( type="moa", transfer_metadata=[self.clue], )