Downstream analysis based on Anndata
object
After running desc
, you get an AnnData
object. Then, you may want to do some downstream analysis in R
.
Extract some information in Anndata
based on desc
analysis.
- extract
.obs
#`.obs` saved the metadata of each cell, such as nUMI, nGene, clusterID, et al.
df=adata.obs
- extract
.var
#`.var` saved the metadata of each gene, such as genename, gene symbol, et al. df=adata.var
- extract
.obsm
#`.obsm` saved the tsne coordinate or dimension reduction of each cell df0=adata.obsm["X_tsne"]
Convert AnnData
object in python to Seurat
object in R
- Install necessary R packages.
list.of.packages <- c("reticulate", "anndata",,"Seurat")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
- Load
Anndata
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3") ad=import("anndata",convert = FALSE) #don't forget convert=FALSE adata=ad$read_h5ad("./data/test.h5ad") adata
- Get
.raw
data In R, almost gene expression level matrix is savedgenes
×cells
(each row stands for a gene and each column stands for a cell). So we need to transpose the sparse matrix toCompressed Sparse Column format
raw.data.matrix <- tryCatch(
expr = t(py_to_r(from$raw$X)),
error = function(e) {
stop("No adata.raw.X in provided adata. please make sure adata has adata.raw.X when you tyr to turn it to `seurat object`")
})
Some useful functions
suppressMessages(library(Seurat))# the version of Seurat <3.0
suppressMessages(library(reticulate))
#turn category obs in adata into string
change_obs=function(adata,exclude=c("n_genes","n_counts")){
#@exclude: the columns names
n_col=as.numeric(as.character(adata$obs$columns$shape[0]))
tmp=adata$obs$columns
for (i in seq_len(n_col)){
cur_colname=as.character(tmp[i-1])
if (!cur_colname %in% exclude){
adata$obs[[cur_colname]]=adata$obs[[cur_colname]]$astype("str")
}
}
return(adata)
}
Convert_from_anndata_to_seurat=function(from=adata,X.slot="scale.data",raw.X.slot="logcount.data"){
if(!py_module_available("anndata")) {
stop("Please install the anndata python module")
}
stopifnot(X.slot%in%c("scale.data","normlizecount.data"))
stopifnot(raw.X.slot%in%c("count.data","logcount.data"))
data.matrix=tryCatch(
expr=t(py_to_r(from$X)),
error=function(e){
stop("No adata.X in provided adata. If Both adata.X and adata.raw.X are None")
}
)
rownames(data.matrix)<-rownames(py_to_r(from$var))
colnames(data.matrix)<-rownames(py_to_r(from$obs))
if (X.slot=="normalizecount.data") X.slot="data"
raw.data.matrix <- tryCatch(
expr = t(py_to_r(from$raw$X)),
error = function(e) {
stop("No adata.raw.X in provided adata. please make sure adata have adata.raw.X when you tyr to turn it to `seurat object`")
}
)
if(raw.X.slot=="logcount.data"){
raw.data.matrix=expm1(raw.data.matrix)
}
rownames(x = raw.data.matrix) <- rownames(x = py_to_r(from$raw$var))
colnames(x = raw.data.matrix) <- rownames(x =py_to_r(from$obs))
#get meta.data
meta.data=py_to_r(from$obs)
if ("nUMI" %in% colnames(x = meta.data)) {
colnames(x = meta.data) <- gsub(
pattern = "nUMI",
replacement = "nUMI_ori",
x = colnames(x = meta.data)
)
}
if ("nGene" %in% colnames(x = meta.data)) {
colnames(x = meta.data) <- gsub(
pattern = "nGene",
replacement = "nGene_ori",
x = colnames(x = meta.data)
)
}
seurat.object <- CreateSeuratObject(raw.data = raw.data.matrix,meta.data = meta.data)
seurat.object <- SetAssayData(
object = seurat.object,
assay.type = "RNA",
slot = X.slot,
new.data = data.matrix
)
#deal with obsm fields that are not dimensional reductions, or have different name structures
x1=py_to_r(from$obsm$keys())
drs<-unlist(strsplit(gsub(".{1,50}:|\\s|)",replacement = "",x = x1),split = ","))
for (dr in drs) {
dr.embed <- py_to_r(from$obsm[[eval(dr)]])
dr.name <- sub(pattern="X_",replacement="",x=dr)
if (is.na(dr.name)|dr.name=="") {
dr.name <- dr
}
dr.dict <- list(tSNE_ = "tsne", PC = "pca")
if (dr.name %in% dr.dict) {
dr.key <- names(x = which(x = dr.dict == dr.name))
} else {
dr.key <- toupper(x = dr.name)
}
colnames(x = dr.embed) <- paste0(dr.key, 1:ncol(x = dr.embed))
rownames(x = dr.embed) <- seurat.object@cell.names
seurat.object <- SetDimReduction(
object = seurat.object,
reduction.type = dr.name,
slot = "cell.embeddings",
new.data = dr.embed
)
seurat.object <- SetDimReduction(
object = seurat.object,
reduction.type = dr.name,
slot = "key",
new.data = dr.key
)
}
return(seurat.object)
}
# example
adata=change_obs(adata,exclude=c("n_genes","n_counts"))# columns to exclude
obj0=Convert_from_anndata_to_seurat(adata,raw.X.slot = "count.data")
obj0=NormalizeData(obj0,display.progress = F)
Updating…..