Go back to desc homepage

Data Summary

The raw gene data, including count matrix and meta.data (celltype, BatchID and ClusterID) were download from here. But here we downloaded this data using scanpy.datasets.paul15() in python software.

#python console
import scanpy.api as sc
adata=sc.datasets.paul15() #may need a few minitues to download it
adata.write("adata_paul.h5ad")  #save dataset
.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))# before run this Rmarkdown, we need 
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scales))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(grid))
fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
datadirpath="/media/xiaoxiang/D/raw_data/DECv.1/Paul_data_result/"

Compared Methods

For this dataset, we included 2 methods for comparison.

Celltype full name

The Clustering Result

DESC

dataset="paul_result"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot = "count.data")
#obj=NormalizeData(obj,display.progress = F)
## set color pallate we may used
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}
op=par(mar=c(5,4,6,4))
#color_celltype=c('#B903AA','#1CE6FF','#FF34FF','#FF4A46','#008941','#006FA6','#7A4900','#63FFAC','#B79762','#004D43','#8FB0FF','#997D87','#5A0007','#809693','#FEFFE6','#1B4400','#4FC601','#3B5DFF','#4A3B53','#FF2F80','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')
colors_use=gg_color_hue(10)
names(colors_use)=paste0(0:9)
image(1:length(colors_use),1, as.matrix(1:length(colors_use)),col=colors_use,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(colors_use)),labels=colors_use,las=2,lwd=0)

old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
                                         panel.grid =element_blank()))
resolution_use="1.0"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$celltype=gsub(replacement = "",pattern ="^[0-9]{1,2}",x =df_desc$paul15_clusters)
df_desc$clusterID=as.numeric(gsub(replacement = "",pattern = "Baso|DC|Eos|Ery|GMP|Lymph|MEP|Mk|Mo|Neu",df_desc$paul15_clusters))
df_desc$celltype=factor(df_desc$celltype,levels=c("Ery","MEP","Mk","GMP","DC","Baso","Mo","Neu","Eos","Lymph"))
df_desc$paul15_clusters=factor(df_desc$paul15_clusters,levels=unique(as.character(df_desc$paul15_clusters))[order(as.numeric(gsub("",pattern = "[a-zA-Z]{0,5}",unique(as.character(df_desc$paul15_clusters)))))])
df_desc$Cluster=df_desc$paul15_clusters
theme_use=theme(legend.key.size = unit(1,"cm"),
                legend.key.width = unit(0,'cm'),
                legend.text = element_text(size=16),
                legend.title = element_text(size=20),
                legend.key.height = unit(0.7,"cm"),
                axis.text.x = element_blank(),
                axis.text.y = element_blank())
theme_use_feature=theme(legend.justification=0.5,
                legend.key.size = unit(1,"cm"),
                legend.text = element_text(size=20),
                legend.title = element_text(size=25),
                legend.key.height = unit(1.0,"cm"))
pdesc_1=getplot(df_desc,by.group = desc_use,ggtitle0 = "DESC-Cluster",pt.size = 1,add.label = T,jitter_value = 0.02)+
  scale_color_manual(values=colorRampPalette(colors_use)(10))+
  theme_use
pdesc_2=getplot(df_desc,by.group = "celltype",ggtitle0 = "Celltype",pt.size = 1,add.label = T,jitter_value = 0.02)+
  scale_color_manual(values=colorRampPalette(colors_use)(10))+
  theme_use
pdesc_3=getplot(df_desc,by.group ="Cluster",ggtitle0 = "Ori.Cluster",pt.size=1,add.label = T,jitter_value = 0.05)+
  scale_color_manual(values=colorRampPalette(colors_use)(19))+
  guides(colour=guide_legend(override.aes = list(alpha = 1, size=5),ncol=2))+theme_use
x=cbind(df_desc[,c(1,2)],"Max.prob"=apply(py_to_r(adata$uns[paste0("prob_matrix",resolution_use)]),1,max))
pdesc_4=getfeature.plot(x,pt.size = 1,cols.use = c("red","grey"))[[1]]+ggtitle("Max.prob")+theme_use_feature+
  theme(axis.text.x = element_blank(),axis.text.y = element_blank(),legend.title = element_blank())
#p0=egg::ggarrange(p1,p2,p3,p4,ncol=2,draw = F)

scVI

batch_id="nobatch"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata0=ad$read_h5ad(filename)
adata0=change_obs(adata0,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata0,reduction.use = "tsne")
df_scvi$celltype=gsub(replacement = "",pattern ="^[0-9]{1,2}",x =df_scvi$paul15_clusters)
df_scvi$clusterID=as.numeric(gsub(replacement = "",pattern = "Baso|DC|Eos|Ery|GMP|Lymph|MEP|Mk|Mo|Neu",df_scvi$paul15_clusters))
df_scvi$celltype=factor(df_scvi$celltype,levels=c("Ery","MEP","Mk","GMP","DC","Baso","Mo","Neu","Eos","Lymph"))
df_scvi$paul15_clusters=factor(df_scvi$paul15_clusters,levels=unique(as.character(df_scvi$paul15_clusters))[order(as.numeric(gsub("",pattern = "[a-zA-Z]{0,5}",unique(as.character(df_scvi$paul15_clusters)))))])
df_scvi$Cluster=df_scvi$paul15_clusters
pscvi_1=getplot(df_scvi,by.group = "louvain_ori1.1",ggtitle0 = "scVI-Cluster",pt.size = 1,add.label = T,jitter_value = 0.02)+
  theme_use+
  scale_color_manual(values=colorRampPalette(colors_use)(10))
pscvi_2=getplot(df_scvi,by.group = "celltype",ggtitle0 = "Celltype",pt.size = 1,add.label = T,jitter_value = 0.02)+
  theme_use+
  scale_color_manual(values=colorRampPalette(colors_use)(10))
pscvi_3=getplot(df_scvi,by.group ="Cluster",ggtitle0 = "Ori.Cluster",pt.size=1,add.label = T,jitter_value = 0.05)+
  scale_color_manual(values=colorRampPalette(colors_use)(19))+
  guides(colour=guide_legend(override.aes = list(alpha = 1, size=5),ncol=2))+theme_use
p0=egg::ggarrange(pdesc_3+theme(legend.position = "none"),
                 pscvi_3+theme(legend.title = element_blank()),
                 pdesc_2+theme(legend.position = "none"),
                 pscvi_2+theme(legend.title = element_blank()),ncol=2,draw=F,
                 labels=c("DESC","scVI","",""),
                 label.args = list(gp = grid::gpar(font = 4,col="darkblue", cex =1.8)))
p02=egg::ggarrange(pdesc_1+scale_color_manual(name="",values=colors_use),
                 pscvi_1+scale_color_manual(name="",values=colors_use),
                 labels=c("DESC","scVI","",""),
                 pdesc_4,ncol=2,draw=F,
                 label.args = list(gp = grid::gpar(font = 4,col="darkblue", cex =1.8)))
theme_use=theme(legend.key.size = unit(1.0,"cm"),
                legend.key.width = unit(0,'cm'),
                legend.text = element_text(size=16),
                legend.title = element_text(size=20),
                legend.key.height = unit(1.0,"cm"),
                panel.border = element_rect(fill=NA,color="white"),
                panel.background = element_blank()
                #axis.text.x = element_blank(),
                #axis.text.y = element_blank(),
                )

theme_use_feature=theme(legend.justification=0.5,
                legend.key.size = unit(0.5,"cm"),
                legend.text = element_text(size=16),
                legend.title = element_text(size=20),
                legend.key.height = unit(1.2,"cm"),
                #panel.border = element_rect(fill=NA,color="white"),
                panel.background = element_blank()
                #axis.text.x = element_blank(),
                #axis.text.y=element_blank()
                )
#
p3a=egg::ggarrange(pdesc_4+theme_use_feature+theme(legend.position = "right")+
                     ggtitle("DESC"),
                 pdesc_1+ggtitle("DESC cluster")+theme(legend.title = element_blank()),
                 pscvi_1+ggtitle("scVI cluster")+theme(legend.title = element_blank()),
                 ncol=3,draw=F,labels = c("a","b",""),
                       label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold")))
#p3a
p0=egg::ggarrange(pdesc_2+ggtitle("DESC")+theme(legend.title = element_blank(),legend.position = "none",plot.margin = unit(c(0,2.5,0,0),"cm")),
                 pscvi_2+ggtitle("scVI")+theme(legend.title = element_blank())+coord_cartesian(),
                 pdesc_3+ggtitle("DESC")+theme(legend.title = element_blank())+theme(legend.position = "none"),
                 pscvi_3+ggtitle("scVI")+theme(legend.title = element_blank())+coord_cartesian(),ncol=2,draw=F,
                 labels = c("c","d","e","f"),
                       label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold")))
#p0

Figure 6

## Figure 6
ggsave(file.path(fig_path,"Figure.6.tiff"),grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8)),width = 20,height = 18,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.6.pdf"),grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8)),width = 20,height = 18,dpi=300)
grid.arrange(p3a,p0,ncol=1,heights=c(1.1,1.8))


  1. Franziska Paul, Ya’ara Arkin, et al.(2015). Transcriptional Heterogeneity and Lineage Commitment in Myeloid Progenitors. Cell 163, 1663–1677, http://dx.doi.org/10.1016/j.cell.2015.11.013..