Data Summary
- The data can be download in the Gene Expression Omnibus under the accession number
GSE728571
The raw gene data, including count matrix and meta.data (celltype, BatchID and ClusterID) were download from here. But here we downloaded this data using scanpy.datasets.paul15() in python software.
#python console
import scanpy.api as sc
adata=sc.datasets.paul15() #may need a few minitues to download it
adata.write("adata_paul.h5ad") #save dataset.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))# before run this Rmarkdown, we need
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scales))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(grid))
fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
datadirpath="/media/xiaoxiang/D/raw_data/DECv.1/Paul_data_result/"Compared Methods
For this dataset, we included 2 methods for comparison.
DESC: our method, Deep learning enables accurate clustering and batch effect removal in single-cell RNA-seq analysis, we used the python package we developed desc.
scVI: Deep generative modeling for single-cell transcriptomics., we will implement it using python module
scviwith version0.3.0
Celltype full name
- Ery: erythrocyte,
- MEP: megakaryocyte/erythrocyte progenitors (their direct progenies)
- Mk: megakaryocyte
- GMP: granulocyte/macrophage progenitors
- DC: dendritic cells
- Baso: basophil progenitors
- Mo: monocyte
- Neu: neutrophil progenitors
- Eos: eosinophil
- Lymph: lymphoid progenitors (NK)
The Clustering Result
DESC
dataset="paul_result"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot = "count.data")
#obj=NormalizeData(obj,display.progress = F)## set color pallate we may used
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
op=par(mar=c(5,4,6,4))
#color_celltype=c('#B903AA','#1CE6FF','#FF34FF','#FF4A46','#008941','#006FA6','#7A4900','#63FFAC','#B79762','#004D43','#8FB0FF','#997D87','#5A0007','#809693','#FEFFE6','#1B4400','#4FC601','#3B5DFF','#4A3B53','#FF2F80','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')
colors_use=gg_color_hue(10)
names(colors_use)=paste0(0:9)
image(1:length(colors_use),1, as.matrix(1:length(colors_use)),col=colors_use,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(colors_use)),labels=colors_use,las=2,lwd=0)old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
panel.grid =element_blank()))resolution_use="1.0"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$celltype=gsub(replacement = "",pattern ="^[0-9]{1,2}",x =df_desc$paul15_clusters)
df_desc$clusterID=as.numeric(gsub(replacement = "",pattern = "Baso|DC|Eos|Ery|GMP|Lymph|MEP|Mk|Mo|Neu",df_desc$paul15_clusters))
df_desc$celltype=factor(df_desc$celltype,levels=c("Ery","MEP","Mk","GMP","DC","Baso","Mo","Neu","Eos","Lymph"))
df_desc$paul15_clusters=factor(df_desc$paul15_clusters,levels=unique(as.character(df_desc$paul15_clusters))[order(as.numeric(gsub("",pattern = "[a-zA-Z]{0,5}",unique(as.character(df_desc$paul15_clusters)))))])
df_desc$Cluster=df_desc$paul15_clusterstheme_use=theme(legend.key.size = unit(1,"cm"),
legend.key.width = unit(0,'cm'),
legend.text = element_text(size=16),
legend.title = element_text(size=20),
legend.key.height = unit(0.7,"cm"),
axis.text.x = element_blank(),
axis.text.y = element_blank())
theme_use_feature=theme(legend.justification=0.5,
legend.key.size = unit(1,"cm"),
legend.text = element_text(size=20),
legend.title = element_text(size=25),
legend.key.height = unit(1.0,"cm"))
pdesc_1=getplot(df_desc,by.group = desc_use,ggtitle0 = "DESC-Cluster",pt.size = 1,add.label = T,jitter_value = 0.02)+
scale_color_manual(values=colorRampPalette(colors_use)(10))+
theme_use
pdesc_2=getplot(df_desc,by.group = "celltype",ggtitle0 = "Celltype",pt.size = 1,add.label = T,jitter_value = 0.02)+
scale_color_manual(values=colorRampPalette(colors_use)(10))+
theme_use
pdesc_3=getplot(df_desc,by.group ="Cluster",ggtitle0 = "Ori.Cluster",pt.size=1,add.label = T,jitter_value = 0.05)+
scale_color_manual(values=colorRampPalette(colors_use)(19))+
guides(colour=guide_legend(override.aes = list(alpha = 1, size=5),ncol=2))+theme_use
x=cbind(df_desc[,c(1,2)],"Max.prob"=apply(py_to_r(adata$uns[paste0("prob_matrix",resolution_use)]),1,max))
pdesc_4=getfeature.plot(x,pt.size = 1,cols.use = c("red","grey"))[[1]]+ggtitle("Max.prob")+theme_use_feature+
theme(axis.text.x = element_blank(),axis.text.y = element_blank(),legend.title = element_blank())
#p0=egg::ggarrange(p1,p2,p3,p4,ncol=2,draw = F)scVI
batch_id="nobatch"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata0=ad$read_h5ad(filename)
adata0=change_obs(adata0,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata0,reduction.use = "tsne")
df_scvi$celltype=gsub(replacement = "",pattern ="^[0-9]{1,2}",x =df_scvi$paul15_clusters)
df_scvi$clusterID=as.numeric(gsub(replacement = "",pattern = "Baso|DC|Eos|Ery|GMP|Lymph|MEP|Mk|Mo|Neu",df_scvi$paul15_clusters))
df_scvi$celltype=factor(df_scvi$celltype,levels=c("Ery","MEP","Mk","GMP","DC","Baso","Mo","Neu","Eos","Lymph"))
df_scvi$paul15_clusters=factor(df_scvi$paul15_clusters,levels=unique(as.character(df_scvi$paul15_clusters))[order(as.numeric(gsub("",pattern = "[a-zA-Z]{0,5}",unique(as.character(df_scvi$paul15_clusters)))))])
df_scvi$Cluster=df_scvi$paul15_clusterspscvi_1=getplot(df_scvi,by.group = "louvain_ori1.1",ggtitle0 = "scVI-Cluster",pt.size = 1,add.label = T,jitter_value = 0.02)+
theme_use+
scale_color_manual(values=colorRampPalette(colors_use)(10))
pscvi_2=getplot(df_scvi,by.group = "celltype",ggtitle0 = "Celltype",pt.size = 1,add.label = T,jitter_value = 0.02)+
theme_use+
scale_color_manual(values=colorRampPalette(colors_use)(10))
pscvi_3=getplot(df_scvi,by.group ="Cluster",ggtitle0 = "Ori.Cluster",pt.size=1,add.label = T,jitter_value = 0.05)+
scale_color_manual(values=colorRampPalette(colors_use)(19))+
guides(colour=guide_legend(override.aes = list(alpha = 1, size=5),ncol=2))+theme_usep0=egg::ggarrange(pdesc_3+theme(legend.position = "none"),
pscvi_3+theme(legend.title = element_blank()),
pdesc_2+theme(legend.position = "none"),
pscvi_2+theme(legend.title = element_blank()),ncol=2,draw=F,
labels=c("DESC","scVI","",""),
label.args = list(gp = grid::gpar(font = 4,col="darkblue", cex =1.8)))
p02=egg::ggarrange(pdesc_1+scale_color_manual(name="",values=colors_use),
pscvi_1+scale_color_manual(name="",values=colors_use),
labels=c("DESC","scVI","",""),
pdesc_4,ncol=2,draw=F,
label.args = list(gp = grid::gpar(font = 4,col="darkblue", cex =1.8)))theme_use=theme(legend.key.size = unit(1.0,"cm"),
legend.key.width = unit(0,'cm'),
legend.text = element_text(size=16),
legend.title = element_text(size=20),
legend.key.height = unit(1.0,"cm"),
panel.border = element_rect(fill=NA,color="white"),
panel.background = element_blank()
#axis.text.x = element_blank(),
#axis.text.y = element_blank(),
)
theme_use_feature=theme(legend.justification=0.5,
legend.key.size = unit(0.5,"cm"),
legend.text = element_text(size=16),
legend.title = element_text(size=20),
legend.key.height = unit(1.2,"cm"),
#panel.border = element_rect(fill=NA,color="white"),
panel.background = element_blank()
#axis.text.x = element_blank(),
#axis.text.y=element_blank()
)#
p3a=egg::ggarrange(pdesc_4+theme_use_feature+theme(legend.position = "right")+
ggtitle("DESC"),
pdesc_1+ggtitle("DESC cluster")+theme(legend.title = element_blank()),
pscvi_1+ggtitle("scVI cluster")+theme(legend.title = element_blank()),
ncol=3,draw=F,labels = c("a","b",""),
label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold")))
#p3ap0=egg::ggarrange(pdesc_2+ggtitle("DESC")+theme(legend.title = element_blank(),legend.position = "none",plot.margin = unit(c(0,2.5,0,0),"cm")),
pscvi_2+ggtitle("scVI")+theme(legend.title = element_blank())+coord_cartesian(),
pdesc_3+ggtitle("DESC")+theme(legend.title = element_blank())+theme(legend.position = "none"),
pscvi_3+ggtitle("scVI")+theme(legend.title = element_blank())+coord_cartesian(),ncol=2,draw=F,
labels = c("c","d","e","f"),
label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold")))
#p0Figure 6
## Figure 6
ggsave(file.path(fig_path,"Figure.6.tiff"),grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8)),width = 20,height = 18,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.6.pdf"),grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8)),width = 20,height = 18,dpi=300)
grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8))Franziska Paul, Ya’ara Arkin, et al.(2015). Transcriptional Heterogeneity and Lineage Commitment in Myeloid Progenitors. Cell 163, 1663–1677, http://dx.doi.org/10.1016/j.cell.2015.11.013..↩