Downstream analysis based on Anndata object
After running desc, you get an AnnData object. Then, you may want to do some downstream analysis in R.
Extract some information in Anndata based on desc analysis.
- extract
.obs
#`.obs` saved the metadata of each cell, such as nUMI, nGene, clusterID, et al.
df=adata.obs
- extract
.var#`.var` saved the metadata of each gene, such as genename, gene symbol, et al. df=adata.var - extract
.obsm#`.obsm` saved the tsne coordinate or dimension reduction of each cell df0=adata.obsm["X_tsne"]
Convert AnnData object in python to Seurat object in R
- Install necessary R packages.
list.of.packages <- c("reticulate", "anndata",,"Seurat")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
- Load
AnndataSys.setenv(RETICULATE_PYTHON="/usr/bin/python3") ad=import("anndata",convert = FALSE) #don't forget convert=FALSE adata=ad$read_h5ad("./data/test.h5ad") adata - Get
.rawdata In R, almost gene expression level matrix is savedgenes×cells(each row stands for a gene and each column stands for a cell). So we need to transpose the sparse matrix toCompressed Sparse Column format
raw.data.matrix <- tryCatch(
expr = t(py_to_r(from$raw$X)),
error = function(e) {
stop("No adata.raw.X in provided adata. please make sure adata has adata.raw.X when you tyr to turn it to `seurat object`")
})
Some useful functions
suppressMessages(library(Seurat))# the version of Seurat <3.0
suppressMessages(library(reticulate))
#turn category obs in adata into string
change_obs=function(adata,exclude=c("n_genes","n_counts")){
#@exclude: the columns names
n_col=as.numeric(as.character(adata$obs$columns$shape[0]))
tmp=adata$obs$columns
for (i in seq_len(n_col)){
cur_colname=as.character(tmp[i-1])
if (!cur_colname %in% exclude){
adata$obs[[cur_colname]]=adata$obs[[cur_colname]]$astype("str")
}
}
return(adata)
}
Convert_from_anndata_to_seurat=function(from=adata,X.slot="scale.data",raw.X.slot="logcount.data"){
if(!py_module_available("anndata")) {
stop("Please install the anndata python module")
}
stopifnot(X.slot%in%c("scale.data","normlizecount.data"))
stopifnot(raw.X.slot%in%c("count.data","logcount.data"))
data.matrix=tryCatch(
expr=t(py_to_r(from$X)),
error=function(e){
stop("No adata.X in provided adata. If Both adata.X and adata.raw.X are None")
}
)
rownames(data.matrix)<-rownames(py_to_r(from$var))
colnames(data.matrix)<-rownames(py_to_r(from$obs))
if (X.slot=="normalizecount.data") X.slot="data"
raw.data.matrix <- tryCatch(
expr = t(py_to_r(from$raw$X)),
error = function(e) {
stop("No adata.raw.X in provided adata. please make sure adata have adata.raw.X when you tyr to turn it to `seurat object`")
}
)
if(raw.X.slot=="logcount.data"){
raw.data.matrix=expm1(raw.data.matrix)
}
rownames(x = raw.data.matrix) <- rownames(x = py_to_r(from$raw$var))
colnames(x = raw.data.matrix) <- rownames(x =py_to_r(from$obs))
#get meta.data
meta.data=py_to_r(from$obs)
if ("nUMI" %in% colnames(x = meta.data)) {
colnames(x = meta.data) <- gsub(
pattern = "nUMI",
replacement = "nUMI_ori",
x = colnames(x = meta.data)
)
}
if ("nGene" %in% colnames(x = meta.data)) {
colnames(x = meta.data) <- gsub(
pattern = "nGene",
replacement = "nGene_ori",
x = colnames(x = meta.data)
)
}
seurat.object <- CreateSeuratObject(raw.data = raw.data.matrix,meta.data = meta.data)
seurat.object <- SetAssayData(
object = seurat.object,
assay.type = "RNA",
slot = X.slot,
new.data = data.matrix
)
#deal with obsm fields that are not dimensional reductions, or have different name structures
x1=py_to_r(from$obsm$keys())
drs<-unlist(strsplit(gsub(".{1,50}:|\\s|)",replacement = "",x = x1),split = ","))
for (dr in drs) {
dr.embed <- py_to_r(from$obsm[[eval(dr)]])
dr.name <- sub(pattern="X_",replacement="",x=dr)
if (is.na(dr.name)|dr.name=="") {
dr.name <- dr
}
dr.dict <- list(tSNE_ = "tsne", PC = "pca")
if (dr.name %in% dr.dict) {
dr.key <- names(x = which(x = dr.dict == dr.name))
} else {
dr.key <- toupper(x = dr.name)
}
colnames(x = dr.embed) <- paste0(dr.key, 1:ncol(x = dr.embed))
rownames(x = dr.embed) <- seurat.object@cell.names
seurat.object <- SetDimReduction(
object = seurat.object,
reduction.type = dr.name,
slot = "cell.embeddings",
new.data = dr.embed
)
seurat.object <- SetDimReduction(
object = seurat.object,
reduction.type = dr.name,
slot = "key",
new.data = dr.key
)
}
return(seurat.object)
}
# example
adata=change_obs(adata,exclude=c("n_genes","n_counts"))# columns to exclude
obj0=Convert_from_anndata_to_seurat(adata,raw.X.slot = "count.data")
obj0=NormalizeData(obj0,display.progress = F)
Updating…..