Data Summary
We used the data generated by the following paper Molecular Classification and Comparative Taxonomics of Foveal and Peripheral Cells in Primate Retina
. The original paper has 165679
cells, which includ 42020
retinal ganglion cells (RGCs), 36268
non-neuronal (NN) , 30302
bipolar cells (BC), 30236
amacrine cells (AC), 24707 photoreceports (PR) and 2146
horizontal cells(HC), but here we only focus on 30302
BC cells.
- Paper: Yi-Rong Peng, Karthik Shekhar, et al. Molecular Classification and Comparative Taxonomics of Foveal and Peripheral Cells in Primate Retina, Cell, 2019,Volume 176, Issue 5, 1222-1237.
- datatype: UMI
- Download link: GSE118480. This SuperSeries is composed of the following SubSeries:
- The BC cells have
30302
cells, including 12 main clusters.'IMB', 'FMB', 'RB', 'DB5\*', 'DB4', 'DB3b', 'DB2', 'BB/GB\*','DB1', 'DB6', 'DB3a', 'OFFx'
- This dataset has three different batch indexes, which include
macaque
,region
andsample
. To remove batch effect, we will usemacaque
,region
andsample
, respectively to assessent the performance of each method.
options(warn = -1)
.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))
knitr::opts_chunk$set(echo=T,include = T)
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(png))
suppressPackageStartupMessages(library(RANN))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(vioplot))
suppressPackageStartupMessages(library(RColorBrewer))
datadirpath="/media/xiaoxiang/D/raw_data/DECv.1/monkey_result"
fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
# load necessary func
source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
panel.background = element_blank(),
panel.grid =element_blank()))
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
op=par(mar=c(5,4,6,4))
color_celltype=c('#1CE6FF','#FF34FF','#FF4A46','#008941','#A30059','#FF2F80','#0000A6','#63FFAC','#004D43','#8FB0FF','#4FC601','#3B5DFF','#4A3B53','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')
image(1:length(color_celltype),1, as.matrix(1:length(color_celltype)),col=color_celltype,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(color_celltype)),labels=color_celltype,las=2,lwd=0)
op=par(mar=c(5,4,6,4))
col.set.update <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#FE0092", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
"#D35400", "#00eefd", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
"#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#007CC8")
image(1:length(col.set.update),1, as.matrix(1:length(col.set.update)),col=col.set.update,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(col.set.update)),labels=col.set.update,las=2,lwd=0)
The Clustering Result
DESC
For our method DESC, we tried four different situations.
- scaling across all cells,
- scaling by
macaque_id
- scaling by
sample
- scaling by
region
df0=list()
df0$desc=list()
df0$cca=list()
df0$cca3.0=list()
df0$mnn=list()
df0$scVI=list()
df0$saucie=list()
df0$BERMUDA=list()
df0$scanorama=list()
KL_list=list()
KL_list$desc=list()
KL_list$cca=list()
KL_list$cca3.0=list()
KL_list$mnn=list()
KL_list$scvi=list()
KL_list$saucie=list()
KL_list$BERMUDA0.90=list()
KL_list$BERMUDA0.85=list()
KL_list$scanorama=list()
#if (file.exists("df0.RData")) load("df0.RData")
DESC–scaling across all cells
resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["all"]]=df_desc
df0$desc$dr_desc[["all"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "all" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="sample"),
region=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="region"))
KL_list$desc[["all"]]=tmp
KL_list$desc[['all']]$Method="DESC"
KL_list$desc[['all']]$Group="All"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)
pdesc_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(pdesc_1,pdesc_2,pdesc_5,pdesc_4,pdesc_3,pdesc_KL,ncol=3,draw = F)
Note
dr
stands for that we compute KL divergence using the output of the embedded layer (32 dimension);tsne
stands for that we compute KL divergence using the tsne coordinate(2 dimension);
DESC–scaling by group macaque_id
resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_macaque_id"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["macaque_id"]]=df_desc
df0$desc$dr_desc[["macaque_id"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "macaque_id" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="sample"),
region=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="region"))
KL_list$desc[["macaque_id"]]=tmp
KL_list$desc[['macaque_id']]$Method="DESC"
KL_list$desc[['macaque_id']]$Group="macaque_id"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_m_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_m_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_m_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_m_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_m_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(pdesc_m_1,pdesc_m_2,pdesc_m_5,pdesc_m_4,pdesc_m_3,p_KL,ncol=3,draw = F)
DESC–scaling by group sample
resolution_use="0.3"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_sample/"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# column
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["sample"]]=df_desc
df0$desc$dr_desc[["sample"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "sample" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="sample"),
region=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="region"))
KL_list$desc[["sample"]]=tmp
KL_list$desc[['sample']]$Method="DESC"
KL_list$desc[['sample']]$Group="sample"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_s_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_s_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_s_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_s_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_s_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
DESC–scaling by group region
resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_region/"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# column
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["region"]]=df_desc
df0$desc$dr_desc[["region"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "region" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="sample"),
region=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="region"))
KL_list$desc[["region"]]=tmp
KL_list$desc[['region']]$Method="DESC"
KL_list$desc[['region']]$Group="region"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_r_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_r_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_r_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_r_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_r_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
CCA3.0
macaque_id
, top 2000
batch_id="macaque_id"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[["macaque_id"]]=df_cca3.0
df0$cca3.0$dr_cca3.0[["macaque_id"]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
#getplot(df_cca3.0,by.group = "sample",pt.size = 0.1)
cca3.0_use="macaque_idtop2000"
if (! "macaque_id" %in% names(KL_list$cca3.0)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="region"))
KL_list$cca3.0[["macaque_id"]]=tmp
KL_list$cca3.0[['macaque_id']]$Method="Seurat3.0"
KL_list$cca3.0[['macaque_id']]$Group="macaque_id"
}
df_cca3.0[,"integrated_snn_res.0.1"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.1"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.1"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_m_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_m_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_m_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_m_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_m_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
sampleID
, top 2000
batch_id="sample"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[[batch_id]]=df_cca3.0
df0$cca3.0$dr_cca3.0[[batch_id]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
cca3.0_use="sampletop2000"
if (! "sample" %in% names(KL_list$cca3.0)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="region"))
KL_list$cca3.0[["sample"]]=tmp
KL_list$cca3.0[['sample']]$Method="Seurat3.0"
KL_list$cca3.0[['sample']]$Group="sample"
}
df_cca3.0[,"integrated_snn_res.0.1"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.1"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.1"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_s_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_s_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_s_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_s_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_s_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
, top 2000
batch_id="region"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[["region"]]=df_cca3.0
df0$cca3.0$dr_cca3.0[["region"]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
cca3.0_use="regiontop2000"
if (! "region" %in% names(KL_list$cca3.0)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="region"))
KL_list$cca3.0[["region"]]=tmp
KL_list$cca3.0[['region']]$Method="Seurat3.0"
KL_list$cca3.0[['region']]$Group="region"
}
df_cca3.0[,"integrated_snn_res.0.01"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.01"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.01"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_r_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_r_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_r_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_r_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_r_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
Note The minumum resolution I used is 0.2, so the number of clusters is larger than the number of true celltype.
CCA
macaque_id
, top 2000
batch_id="macaque_id"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["macaque_id"]]=df_cca
df0$cca$dr_cca[["macaque_id"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="macaque_idtop2000"
if (! "macaque_id" %in% names(KL_list$cca)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="region"))
KL_list$cca[["macaque_id"]]=tmp
KL_list$cca[['macaque_id']]$Method="CCA"
KL_list$cca[['macaque_id']]$Group="macaque_id"
}
df_cca[,"res.0.1"]=factor(as.numeric(as.character(df_cca[,"res.0.1"])))
df_cca[,"res"]=df_cca[,"res.0.1"]
df0$cca$df_cca[["macaque_id"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res.0.1"])
pcca_m_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_m_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_m_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_m_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_m_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
sampleID
, top 2000
batch_id="sample"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["sample"]]=df_cca
df0$cca$dr_cca[["sample"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="sampletop2000"
if (! "sample" %in% names(KL_list$cca)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="region"))
KL_list$cca[["sample"]]=tmp
KL_list$cca[['sample']]$Method="CCA"
KL_list$cca[['sample']]$Group="sample"
}
df_cca[,"res.0.1"]=factor(as.numeric(as.character(df_cca[,"res.0.1"])))
df_cca[,"res"]=df_cca[,"res.0.1"]
df0$cca$df_cca[["sample"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res"])
pcca_s_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_s_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_s_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_s_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_s_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
, top 2000
batch_id="region"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["region"]]=df_cca
df0$cca$dr_cca[["region"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="regiontop2000"
if (! "region" %in% names(KL_list$cca)){
tmp=data.frame(
macaque_id=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="sample"),
region=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="region"))
KL_list$cca[["region"]]=tmp
KL_list$cca[['region']]$Method="CCA"
KL_list$cca[['region']]$Group="region"
}
df_cca[,"res.0.2"]=factor(as.numeric(as.character(df_cca[,"res.0.2"])))
df_cca[,"res"]=df_cca[,"res.0.2"]
df0$cca$df_cca[["region"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res"])
pcca_r_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_r_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_r_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_r_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_r_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
MNN
macaque_id
, top 2000
batch_id="macaque_id"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["macaque_id"]]=df_mnn
df0$mnn$dr_mnn[["macaque_id"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$mnn)){
tmp=data.frame(
macaque_id=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="sample"),
region=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="region"))
KL_list$mnn[["macaque_id"]]=tmp
KL_list$mnn[['macaque_id']]$Method="MNN"
KL_list$mnn[['macaque_id']]$Group="macaque_id"
}
df_mnn[,"res.0.1"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.1"])))
df_mnn[,"res"]=df_mnn[,"res.0.1"]
df0$mnn$df_mnn[["macaque_id"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_m_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_m_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_m_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_m_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_m_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
sample
, top 2000
batch_id="sample"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["sample"]]=df_mnn
df0$mnn$dr_mnn[["sample"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "sample" %in% names(KL_list$mnn)){
tmp=data.frame(
macaque_id=BatchKL(df0$mnn$df_mnn[["sample"]],df0$cca$dr_mnn[["sample"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$mnn$df_mnn[["sample"]],df0$mnn$dr_mnn[["sample"]],n_cells=100,batch="sample"),
region=BatchKL(df0$mnn$df_mnn[["sample"]],df0$mnn$dr_mnn[["sample"]],n_cells=100,batch="region"))
KL_list$mnn[["sample"]]=tmp
KL_list$mnn[['sample']]$Method="MNN"
KL_list$mnn[['sample']]$Group="sample"
}
df_mnn[,"res.0.4"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.4"])))
df_mnn[,"res"]=df_mnn[,"res.0.4"]
df0$cca$df_mnn[["sample"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_s_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_s_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_s_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_s_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_s_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
, top 2000
batch_id="region"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["region"]]=df_mnn
df0$mnn$dr_mnn[["region"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "region" %in% names(KL_list$mnn)){
tmp=data.frame(
macaque_id=BatchKL(df0$mnn$df_mnn[["region"]],df0$cca$dr_mnn[["region"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$mnn$df_mnn[["region"]],df0$mnn$dr_mnn[["region"]],n_cells=100,batch="sample"),
region=BatchKL(df0$mnn$df_mnn[["region"]],df0$mnn$dr_mnn[["region"]],n_cells=100,batch="region"))
KL_list$mnn[["region"]]=tmp
KL_list$mnn[['region']]$Method="MNN"
KL_list$mnn[['region']]$Group="region"
}
df_mnn[,"res.0.1"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.1"])))
df_mnn[,"res"]=df_mnn[,"res.0.1"]
df0$cca$df_mnn[["sample"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_r_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_r_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_r_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_r_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_r_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
scVI
scVI-scaling across all cells (top 2000 genes)
batch_id="nobatch"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["all"]]=df_scvi
df0$scvi$dr_scvi[["all"]]=py_to_r(adata$X)
if (! "all" %in% names(KL_list$scvi)){
tmp=data.frame(
macaque_id=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="sample"),
region=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="region"))
KL_list$scvi[["all"]]=tmp
KL_list$scvi[['all']]$Method="scVI"
KL_list$scvi[['all']]$Group="All"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.05"])))
df0$scvi$df_scvi[["macaque_id"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
macaque_id
, top 2000
batch_id="macaque_id"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["macaque_id"]]=df_scvi
df0$scvi$dr_scvi[["macaque_id"]]=py_to_r(adata$X)
if (! "macaque_id" %in% names(KL_list$scvi)){
tmp=data.frame(
macaque_id=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="sample"),
region=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="region"))
KL_list$scvi[["macaque_id"]]=tmp
KL_list$scvi[['macaque_id']]$Method="scVI"
KL_list$scvi[['macaque_id']]$Group="macaque_id"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.2"])))
df0$scvi$df_scvi[["macaque_id"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_m_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_m_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_m_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_m_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_m_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
sample
, top 2000
batch_id="sample"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["sample"]]=df_scvi
df0$scvi$dr_scvi[["sample"]]=py_to_r(adata$X)
if (! "sample" %in% names(KL_list$scvi)){
tmp=data.frame(
macaque_id=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="sample"),
region=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="region"))
KL_list$scvi[["sample"]]=tmp
KL_list$scvi[['sample']]$Method="scVI"
KL_list$scvi[['sample']]$Group="sample"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.4"])))
df0$scvi$df_scvi[["sample"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_s_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_s_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_s_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_s_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_s_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
, top 2000
batch_id="region"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["region"]]=df_scvi
df0$scvi$dr_scvi[["region"]]=py_to_r(adata$X)
if (! "region" %in% names(KL_list$scvi)){
tmp=data.frame(
macaque_id=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="sample"),
region=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="region"))
KL_list$scvi[["region"]]=tmp
KL_list$scvi[['region']]$Method="scVI"
KL_list$scvi[['region']]$Group="region"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.1"])))
df0$scvi$df_scvi[["sample"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_r_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_r_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_r_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_r_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_r_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
BERMUDA
BERMUDA0.90
macaque_id
batch_id="macaque_id"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_macaque_id.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
df0$BERMUDA0.90$df_bermuda[["macaque_id"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["macaque_id"]]=tmp
KL_list$BERMUDA0.90[['macaque_id']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.5"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.5"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.5"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["macaque_id"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_m_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_m_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_m_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_m_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_m_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(pbermuda0.90_m_1,pbermuda0.90_m_2,pbermuda0.90_m_3,pbermuda0.90_m_4,pbermuda0.90_m_5,ncol=3,draw = F)
sample
batch_id="sample"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_sample.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.90$df_bermuda[["sample"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "sample" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["sample"]]=tmp
KL_list$BERMUDA0.90[['sample']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['sample']]$Group="sample"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.18"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.18"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.18"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["sample"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_s_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_s_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_s_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_s_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_s_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
batch_id="region"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_region.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.90$df_bermuda[["region"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "region" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["region"]]=tmp
KL_list$BERMUDA0.90[['region']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.05"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.05"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.05"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["region"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_r_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_r_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_r_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_r_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_r_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
BERMUDA0.85
macaque_id
batch_id="macaque_id"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_macaque_id.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
df0$BERMUDA0.85$df_bermuda[["macaque_id"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["macaque_id"]]=tmp
KL_list$BERMUDA0.85[['macaque_id']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.1"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.1"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.1"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["macaque_id"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_m_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_m_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_m_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_m_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_m_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(pbermuda0.85_m_1,pbermuda0.85_m_2,pbermuda0.85_m_3,pbermuda0.85_m_4,pbermuda0.85_m_5,ncol=3,draw = F)
sample
batch_id="sample"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_sample.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.85$df_bermuda[["sample"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "sample" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["sample"]]=tmp
KL_list$BERMUDA0.85[['sample']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['sample']]$Group="sample"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.011"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.011"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.011"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["sample"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_s_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_s_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_s_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_s_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_s_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
batch_id="region"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_region.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.85$df_bermuda[["region"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "region" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
region=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["region"]]=tmp
KL_list$BERMUDA0.85[['region']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.2"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.2"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.2"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["region"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_r_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_r_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_r_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_r_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_r_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
scanorama
macaque_id
batch_id="macaque_id"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_macaque_id_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["macaque_id"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "macaque_id" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
region=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["macaque_id"]]=tmp
KL_list$scanorama[['macaque_id']]$Method="scanorama"
KL_list$scanorama[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.6"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.6"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.6"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["macaque_id"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_m_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_m_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_m_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_m_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_m_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
sample
batch_id="sample"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_sample_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["sample"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "sample" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
region=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["macaque_id"]]=tmp
KL_list$scanorama[['macaque_id']]$Method="scanorama"
KL_list$scanorama[['macaque_id']]$Group="sample"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.6"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.6"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.6"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["sample"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_s_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_s_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_s_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_s_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_s_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)
region
batch_id="region"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_region_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["region"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "region" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
sample=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
region=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["region"]]=tmp
KL_list$scanorama[['region']]$Method="scanorama"
KL_list$scanorama[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.4"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.4"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.4"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["region"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_r_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_r_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_r_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_r_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_r_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
## save results for downstreaming plot, which only can be seen myself.
#save(KL_list,df0,file = "KL_list.RData")
#save.image(file="all.RData")
Figure 1
ari_value=c(mclust::adjustedRandIndex(df0$desc$df_desc$all$desc_0.1,df0$desc$df_desc$all$cluster),
mclust::adjustedRandIndex(df0$desc$df_desc$macaque_id$desc_0.1,df0$desc$df_desc$macaque_id$cluster),
mclust::adjustedRandIndex(df0$desc$df_desc$sample$desc_0.3,df0$desc$df_desc$sample$cluster),
mclust::adjustedRandIndex(df0$desc$df_desc$region$desc_0.1,df0$desc$df_desc$region$cluster),
#cca3.0
mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$macaque_id$integrated_snn_res.0.1,df0$cca3.0$df_cca3.0$macaque_id$cluster),
mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$sample$integrated_snn_res.0.3,df0$cca3.0$df_cca3.0$sample$cluster),
mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$region$integrated_snn_res.0.01,df0$cca3.0$df_cca3.0$region$cluster),
#cca
mclust::adjustedRandIndex(df0$cca$df_cca$macaque_id$res.0.1,df0$cca$df_cca$macaque_id$cluster),
mclust::adjustedRandIndex(df0$cca$df_cca$sample$res.0.1,df0$cca$df_cca$sample$cluster),
mclust::adjustedRandIndex(df0$cca$df_cca$region$res.0.2,df0$cca$df_cca$region$cluster),
#mnn
mclust::adjustedRandIndex(df0$mnn$df_mnn$macaque_id$louvain_ori0.1,df0$mnn$df_mnn$macaque_id$cluster),
mclust::adjustedRandIndex(df0$mnn$df_mnn$sample$louvain_ori0.4,df0$mnn$df_mnn$sample$cluster),
mclust::adjustedRandIndex(df0$mnn$df_mnn$region$louvain_ori0.1,df0$mnn$df_mnn$region$cluster),
#scvi
mclust::adjustedRandIndex(df0$scvi$df_scvi$all$louvain_ori0.05,df0$scvi$df_scvi$all$cluster),
mclust::adjustedRandIndex(df0$scvi$df_scvi$macaque_id$louvain_ori0.2,df0$scvi$df_scvi$macaque_id$cluster),
mclust::adjustedRandIndex(df0$scvi$df_scvi$sample$louvain_ori0.4,df0$scvi$df_scvi$sample$cluster),
mclust::adjustedRandIndex(df0$scvi$df_scvi$region$louvain_ori0.1,df0$scvi$df_scvi$region$cluster),
#BERMUDA0.90
mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$macaque_id$louvain_0.5,df0$BERMUDA0.90$df_bermuda$macaque_id$cluster),
mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$sample$louvain_0.18,df0$BERMUDA0.90$df_bermuda$sample$cluster),
mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$region$louvain_0.05,df0$BERMUDA0.90$df_bermuda$region$cluster),
#BERMUDA0.85
mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$macaque_id$louvain_0.1,df0$BERMUDA0.85$df_bermuda$macaque_id$cluster),
mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$sample$louvain_0.011,df0$BERMUDA0.85$df_bermuda$sample$cluster),
mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$region$louvain_0.2,df0$BERMUDA0.85$df_bermuda$region$cluster),
#scanorama
mclust::adjustedRandIndex(df0$scanorama$df_scanorama$macaque_id$louvain_0.6,df0$scanorama$df_scanorama$macaque_id$cluster),
mclust::adjustedRandIndex(df0$scanorama$df_scanorama$sample$louvain_0.6,df0$scanorama$df_scanorama$sample$cluster),
mclust::adjustedRandIndex(df0$scanorama$df_scanorama$region$louvain_0.4,df0$scanorama$df_scanorama$region$cluster)
)
#ARI
ARI_data=data.frame(ARI=ari_value,
UseBatchID=c("All",rep(c("macaque_id","sample","region"),times=4),
c("All","macaque_id","sample","region"),rep(c("macaque_id","sample","region"),times=3)),
Method=c("DESC",rep(c("DESC","Seurat3.0","CCA","MNN"),each=3),rep("scVI",times=4),
rep("BERMUDA0.90",time=3),rep("BERMUDA0.85",times=3),rep("scanorama",times=3)))
ARI_data$Method=factor(ARI_data$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","BERMUDA0.85","scanorama"))
p_ari=ggplot(data=ARI_data,aes(x=factor(Method),y=ARI,fill=UseBatchID,width=0.85))+
geom_bar(position= position_dodge(width = 0.8,preserve = "single"),stat="identity")+
xlab("")+
ylab("Adjusted Rand Index")+
scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.02))+
theme(axis.text.x = element_text(angle=60,hjust=1))+
ggtitle("")+
#scale_fill_manual(values=colors_use)+
theme(legend.key.size = unit(0.6,"cm"))
#ARI
ARI_data=data.frame(ARI=ari_value,
UseBatchID=c("All",rep(c("macaque_id","sample","region"),times=4),
c("All","macaque_id","sample","region"),rep(c("macaque_id","sample","region"),times=3)),
Method=c("DESC",rep(c("DESC","Seurat3.0","CCA","MNN"),each=3),rep("scVI",times=4),
rep("BERMUDA0.90",time=3),rep("BERMUDA0.85",times=3),rep("scanorama",times=3)))
ARI_data=subset(ARI_data,!Method%in%c("BERMUDA0.85"))
ARI_data$Method=factor(ARI_data$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","scanorama"),
labels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA","scanorama"))
p_ari=ggplot(data=ARI_data,aes(x=factor(Method),y=ARI,fill=UseBatchID,width=0.85))+
geom_bar(position= position_dodge(width = 0.8,preserve = "single"),stat="identity")+
xlab("")+
ylab("Adjusted Rand Index")+
scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.02))+
theme(axis.text.x = element_text(angle=60,hjust=1))+
ggtitle("")+
#scale_fill_manual(values=colors_use)+
theme(legend.key.size = unit(0.6,"cm"))
theme_use=theme(legend.key.size = unit(1,"cm"),
legend.key.width = unit(0,'cm'),
legend.text = element_text(size=16),
legend.title = element_text(size=20),
legend.key.height = unit(1.0,"cm"),
axis.text.x = element_blank(),
axis.text.y = element_blank())
theme_use2=theme(legend.key.size = unit(0.5,"cm"),
legend.text = element_text(size=8),
legend.key.height = unit(0.5,"cm"),
legend.title = element_text(size=12),
axis.text.x = element_blank(),
axis.text.y = element_blank())
theme_use_feature=theme(legend.justification=0.5,
legend.key.size = unit(1,"cm"),
legend.text = element_text(size=25),
legend.title = element_text(size=30),
legend.key.height = unit(1.3,"cm"),
axis.text.x = element_blank(),
axis.text.y=element_blank())
#pp1=egg::ggarrange(pdesc_s_3+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom"),
# pdesc_s_5+theme_use2+ggtitle("DESC")+coord_cartesian()+theme(legend.text = element_text(size=15),legend.key = ),
# pdesc_s_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom"),
# ncol=2,draw=F)
p1b=pdesc_s_3+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom")
p1d= pdesc_s_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom")
p1c= pdesc_s_5+theme_use2+ggtitle("DESC")+coord_cartesian()+theme(legend.text = element_text(size=15))
p0=p_ari+theme(legend.text = element_text(size=15),
#legend.key.size = unit(0.5,"cm"),
#legend.key.height = unit(0.5,"cm"),
legend.title = element_blank(),
axis.text.x = element_text(angle = 30, size = 15, hjust = 1,color="black"),
axis.text.y = element_text(angle = 0, vjust = 0, size = 15, hjust = 0.5,color="black"),
axis.title.x = element_text(angle = 0, vjust = 1, size = 15, hjust = 0.5),
axis.title.y = element_text(angle = 90, vjust = 1, size = 15, hjust = 0.5),
plot.title = element_text(hjust=0.5,size=12,face="bold",lineheight=0.8,color="black"),
panel.background=element_rect(fill = "white", colour = "grey50"))
fig_height=20.6
workflow=ggdraw()+draw_image(magick::image_read("~/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/workflow.png"),scale=1)# width/heigth=201/133.4
p_1=ggdraw()+
draw_plot(workflow,x=0.,y=10/fig_height,height=10.59/fig_height,width=1)+
draw_plot(p1b,x=0.01,y=5/fig_height,height = 5/fig_height,width=5.5/16)+
draw_plot(p1c,x=6.2/16,y=5/fig_height+0.035,height = 5/fig_height-0.035,width=9.8/16)+
draw_plot(p1d,x=0.01,y=0,height = 5/fig_height,width=5.5/16)+
draw_plot(p0,x=6.2/16,y=0,height = 5/fig_height-0.015,width=9.5/16)+
draw_label("a",x=0.01,y=1,hjust=0.5,vjust=1,fontface = "bold",size=30)+
draw_label("b",x=0.01,y=10/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
draw_label("c",x=6.2/16,y=10/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
draw_label("d",x=0.01,y=5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
draw_label("e",x=6.2/16,y=5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)
#Figure 1
ggsave(file.path(fig_path,"Figure.1.tiff"),p_1,width=16,height = 20.6,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.1.pdf"),p_1,width=16,height = 20.6,dpi=300)
ggsave(file.path(fig_path,"Figure.1.eps"),p_1,width=16,height = 20.6,dpi=300)
p_1
Figure 10 (left)
#ARI
suppressPackageStartupMessages(library(dplyr))
color_tmp=c('#B903AA','#1CE6FF','#FF34FF','#FF4A46','#008941','#006FA6','#7A4900','#63FFAC','#B79762','#004D43','#8FB0FF','#997D87','#5A0007','#809693','#FEFFE6','#1B4400','#4FC601','#3B5DFF','#4A3B53','#FF2F80','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')
ARI_data0=ARI_data
tmp0=as.character(ARI_data$Method)
tmp0[tmp0%in%c("BERMUDA0.90","BERMUDA0.85")]="BERMUDA"
ARI_data0$Method=tmp0
zz=ARI_data0%>%group_by(Method)%>%summarise(mean=mean(ARI),sd=sd(ARI))
zz$Method=factor(zz$Method,levels=c("DESC","scVI","scanorama","MNN","BERMUDA","Seurat3.0","CCA"))
color_usetmp=color_tmp[1:7]
names(color_usetmp)=c("DESC","scVI","scanorama","MNN","BERMUDA","Seurat3.0","CCA")
pp=ggplot()+geom_bar(data=zz,aes(x=Method,y=mean,fill=Method),stat="identity",width=0.4)+
geom_point(data=zz,aes(x=Method,y=mean),color="black")+
scale_fill_manual(values=color_usetmp,guide=F)+
geom_errorbar(data=zz,aes(x=Method,ymin=mean-sd,ymax=mean+sd),width=0.2,size=1)+
theme(axis.text.x = element_text(angle=60,hjust=1))+
theme(axis.title.y = element_text(angle=-90,hjust=0.5))+
#scale_y_continuous(expand = c(0,0,0.05,0),labels=scales::comma,position = "left",breaks = c(0,10,20,30,40),limits = c(0,40))+
scale_x_discrete(expand=c(0.1,0.,0.05,0))+
scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.1))+
geom_text(aes(x=0.6,y=c(0.25,0.5,0.75,1.0,1.05)-0.02,label=c("0.25","0.50","0.75","1.00","ARI")))+
theme(legend.position="none")+xlab("")+theme_void()+
#geom_hline(aes(yintercept=c(0.25,0.5,0.75,1.0)),linetype="dotted",alpha=1,color="darkgrey")
theme( panel.grid.major.y = element_line(size=0.3,linetype = "dotted"))
#ggsave("~/Documents/DESC_paper_prepare/DESC_paper_v3/figures/figure_main/ari_table.pdf",pp,width = 9,height=3.7,dpi=300)
#Figure 10
ggsave(file.path(fig_path,"Fig.10.left.tiff"),pp,width=9,height = 3.7,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Fig.10.left.pdf"),pp,width=9,height = 3.7,dpi=300)
ggsave(file.path(fig_path,"Fig.10.left.eps"),pp,width=9,height = 3.7,dpi=300)
pp
Figure 2
data_KL0=do.call("rbind",lapply(KL_list,function(x) do.call("rbind",x)))
data_rect=data.frame(xmin=seq(0.5,6.5,by=1),
xmax=seq(1.5,7.5,by=1),
ymin=-Inf,
color=rep(c("#E8F2F4","#8FB0FF"),length=7),
ymax=Inf,stringsAsFactors = F)
p_list_KL=lapply(c("macaque_id","region","sample"),function(zz){
data_KL=subset(data_KL0, !Method%in%"BERMUDA0.85", select=c(zz,"Method","Group"))
df=reshape2::melt(data_KL,id.vars=c("Method","Group"))
#df$Method=factor(df$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","BERMUDA0.85","scanorama"))
df$Method=factor(df$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA","scanorama"))
colnames(df)=c("Method","Group","variable","value")
df$useBatch=df$Group
p_KL=ggplot()+
geom_boxplot(data=df,aes(x=Method,y=value,color=useBatch),position= position_dodge(preserve = "single"))+
xlab("")+
scale_x_discrete(expand = c(0,0,0,0))+
#scale_y_continuous(breaks = 4)+
ylab("KL divergence")+
theme_self+theme(
#aspect.ratio = 1,
axis.text.x = element_text(angle = 30,hjust=1,vjust=1),
panel.border =element_blank(),
axis.line = element_line())+
ggtitle(paste0("KL divergence of ", zz))+
geom_rect(data=data_rect,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,fill=color),alpha=0.05)+
scale_fill_discrete(guide=F)
return(p_KL)
}
)
p_KL=egg::ggarrange(p_list_KL[[1]]+theme(legend.position = "none"),
p_list_KL[[2]]+theme(legend.position = "none",axis.title.y = element_blank()),
p_list_KL[[3]]+theme(axis.title.y = element_blank()),ncol=3,draw=F)
p_KL
p_legend=get_legend(pdesc_4+theme(legend.position = "top",legend.title = element_blank(),
legend.spacing.x = unit(1.5,"cm"),
legend.text = element_text(size=25))+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=15),nrow=1)))
#pp2=egg::ggarrange(
pp2=list(pdesc_r_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "none"),
pcca3.0_r_4+theme_use+ggtitle("Seurat3.0")+coord_cartesian()+theme(legend.position = "none"),
pcca_r_4+theme_use+ggtitle("CCA")+coord_cartesian()+theme(legend.position = "none"),
pmnn_r_4+theme_use+ggtitle("MNN")+coord_cartesian()+theme(legend.position = "none"),
pscvi_r_4+theme_use+ggtitle("scVI")+coord_cartesian()+theme(legend.position = "none"),
#pscvi_4+theme_use+ggtitle("scVI-no batchid")+coord_cartesian()+theme(legend.position = "none"),
pbermuda0.90_r_4+theme_use+ggtitle("BERMUDA")+coord_cartesian()+theme(legend.position = "none"),
#pbermuda0.85_m_4+theme_use+ggtitle("BERMUDA0.85")+coord_cartesian()+theme(legend.position = "none"),
pscanorama_r_4+theme_use+ggtitle("scanorama")+coord_cartesian()+theme(legend.position = "none"),
plot_grid(p_legend))
p2b=egg::ggarrange(plots=pp2,ncol = 3,draw = F)
## adding dummy grobs
#layout=matrix(1:9,3,3,byrow =T)
#ggsave("~/Documents/DESC_paper_prepare/DESC_paper_v3/figures/figure_main/fig2b.tiff",
# grid.arrange(grobs=pp2,layout_matrix=layout),
# width=21,height =17,dpi=200)
fig_height=25
p2=ggdraw()+
draw_plot(p_KL,x=0.01,y=18/fig_height,height=7/fig_height,width=1)+
draw_plot(p2b,x=0.02,y=0,height = 17.5/fig_height,width=0.94)+
draw_label("a",x=0.01,y=1,hjust=0.5,vjust=1,fontface = "bold",size=35)+
draw_label("b",x=0.01,y=17.5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=35)
## Figure 2
ggsave(file.path(fig_path,"Figure.2.tiff"),p2,width=24,height = 25,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.2.pdf"),p2,width=24,height = 25,dpi=300)
ggsave(file.path(fig_path,"Figure.2.eps"),p2,width=24,height = 25,dpi=300)
p2
Figure 3
- without considering batch definition(DESC vs. scVI)
theme_use=theme(legend.key.size = unit(1,"cm"),
legend.key.width = unit(0,'cm'),
legend.text = element_text(size=16),
legend.title = element_text(size=20),
legend.key.height = unit(1.0,"cm"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.line.y=element_blank(),
axis.title.y=element_blank(),
axis.title.x =element_blank())
theme_use2=theme(legend.key.size = unit(0.6,"cm"),
legend.text = element_text(size=10),
legend.key.height = unit(0.6,"cm"),
legend.title = element_text(size=20),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.line.y=element_blank(),
axis.title.y=element_blank(),
axis.title.x =element_blank() )
theme_xx= theme(legend.position = "bottom")
p3_tmp=egg::ggarrange(pdesc_2+theme_use2+ylab("DESC")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pdesc_3+theme_use+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pdesc_4+theme_use+ggtitle("regionID")+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pdesc_5+theme_use2+theme(plot.title = element_text(color="black",face="bold"))+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pscvi_2+theme_use2+ylab("scVI")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pscvi_3+theme_use+ggtitle("")+theme_xx+guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pscvi_4+theme_use+ggtitle("")+theme_xx+guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
pscvi_5+theme_use2+ggtitle("")+theme_xx+
guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
coord_cartesian(),
ncol=4,draw=F,
labels = c("a","","","","b","","",""),
label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold"),just="bottom"))
## Figure 3
ggsave(file.path(fig_path,"Figure.3.tiff"),p3_tmp,width=24,height = 17,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.3.pdf"),p3_tmp,width=24,height = 17,dpi=300)
ggsave(file.path(fig_path,"Figure.3.eps"),p3_tmp,width=24,height = 17,dpi=300)
p3_tmp
Supplementary Figure 1
- Taking macaque_id as batch
p_tmp=egg::ggarrange(
#desc
pdesc_m_2+theme_use2+ylab("DESC")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pdesc_m_3+theme_use+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
pdesc_m_4+theme_use+ggtitle("regionID")+coord_cartesian(),
pdesc_m_5+theme_use2+coord_cartesian(),
##cca3.0 with nobatch
pcca3.0_m_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca3.0_m_3+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_m_4+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_m_5+theme_use2+ggtitle("")+coord_cartesian(),
##cca
pcca_m_2+theme_use2+ylab("CCA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca_m_3+theme_use+ggtitle("")+coord_cartesian(),
pcca_m_4+theme_use+ggtitle("")+coord_cartesian(),
pcca_m_5+theme_use2+ggtitle("")+coord_cartesian(),
##MNN
pmnn_m_2+theme_use2+ylab("MNN")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pmnn_m_3+theme_use+ggtitle("")+coord_cartesian(),
pmnn_m_4+theme_use+ggtitle("")+coord_cartesian(),
pmnn_m_5+theme_use2+ggtitle("")+coord_cartesian(),
#scvi
pscvi_m_2+theme_use2+ylab("scVI")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscvi_m_3+theme_use+ggtitle("")+coord_cartesian(),
pscvi_m_4+theme_use+ggtitle("")+coord_cartesian(),
pscvi_m_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.90
pbermuda0.90_m_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pbermuda0.90_m_3+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_m_4+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_m_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.85
#pbermuda0.85_m_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
# theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
# legend.title = element_blank())+coord_cartesian(),
# pbermuda0.85_m_3+theme_use+ggtitle("")+coord_cartesian(),
# pbermuda0.85_m_4+theme_use+ggtitle("")+coord_cartesian(),
#pbermuda0.85_m_5+theme_use2+ggtitle("")+coord_cartesian(),
#scanorama
pscanorama_m_2+theme_use2+ylab("scanorama")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscanorama_m_3+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_m_4+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_m_5+theme_use2+ggtitle("")+coord_cartesian(),
ncol=4,draw=F)
## Supplementary Figure 1
ggsave(file.path(fig_path,"Figure.S1.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S1.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S1.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp
Supplementary Figure 2
- Taking region as batch
p_tmp=egg::ggarrange(pdesc_r_2+theme_use2+ylab("DESC")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pdesc_r_3+theme_use+coord_cartesian(),
pdesc_r_4+theme_use+ggtitle("regionID")+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
pdesc_r_5+theme_use2+coord_cartesian(),
pcca3.0_r_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca3.0_r_3+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_r_4+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_r_5+theme_use2+ggtitle("")+coord_cartesian(),
pcca_m_2+theme_use2+ylab("CCA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca_r_3+theme_use+ggtitle("")+coord_cartesian(),
pcca_r_4+theme_use+ggtitle("")+coord_cartesian(),
pcca_r_5+theme_use2+ggtitle("")+coord_cartesian(),
pmnn_r_2+theme_use2+ylab("MNN")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pmnn_r_3+theme_use+ggtitle("")+coord_cartesian(),
pmnn_r_4+theme_use+ggtitle("")+coord_cartesian(),
pmnn_r_5+theme_use2+ggtitle("")+coord_cartesian(),
pscvi_r_2+theme_use2+ylab("scVI")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscvi_r_3+theme_use+ggtitle("")+coord_cartesian(),
pscvi_r_4+theme_use+ggtitle("")+coord_cartesian(),
pscvi_r_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.90
pbermuda0.90_r_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pbermuda0.90_r_3+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_r_4+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_r_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.85
#pbermuda0.85_r_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
# theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
# legend.title = element_blank())+coord_cartesian(),
#pbermuda0.85_r_3+theme_use+ggtitle("")+coord_cartesian(),
#pbermuda0.85_r_4+theme_use+ggtitle("")+coord_cartesian(),
#pbermuda0.85_r_5+theme_use2+ggtitle("")+coord_cartesian(),
#scanorama
pscanorama_r_2+theme_use2+ylab("scanorama")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscanorama_r_3+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_r_4+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_r_5+theme_use2+ggtitle("")+coord_cartesian(),
ncol=4,draw=F)
## Supplementary Figure 2
ggsave(file.path(fig_path,"Figure.S2.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S2.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S2.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp
Supplementary Figure 3
-3. Taking sample as batch
p_tmp=egg::ggarrange(pdesc_s_2+theme_use2+ylab("DESC")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pdesc_s_3+theme_use+coord_cartesian(),
pdesc_s_4+theme_use+ggtitle("regionID")+coord_cartesian(),
pdesc_s_5+theme_use2+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
pcca3.0_s_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca3.0_s_3+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_s_4+theme_use+ggtitle("")+coord_cartesian(),
pcca3.0_s_5+theme_use2+ggtitle("")+coord_cartesian(),
pcca_s_2+theme_use2+ylab("CCA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pcca_s_3+theme_use+ggtitle("")+coord_cartesian(),
pcca_s_4+theme_use+ggtitle("")+coord_cartesian(),
pcca_s_5+theme_use2+ggtitle("")+coord_cartesian(),
pmnn_s_2+theme_use2+ylab("MNN")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pmnn_s_3+theme_use+ggtitle("")+coord_cartesian(),
pmnn_s_4+theme_use+ggtitle("")+coord_cartesian(),
pmnn_s_5+theme_use2+ggtitle("")+coord_cartesian(),
pscvi_s_2+theme_use2+ylab("scVI")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscvi_s_3+theme_use+ggtitle("")+coord_cartesian(),
pscvi_s_4+theme_use+ggtitle("")+coord_cartesian(),
pscvi_s_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.90
pbermuda0.90_s_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pbermuda0.90_s_3+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_s_4+theme_use+ggtitle("")+coord_cartesian(),
pbermuda0.90_s_5+theme_use2+ggtitle("")+coord_cartesian(),
#BERMUDA0.85
#pbermuda0.85_s_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
# theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
# legend.title = element_blank())+coord_cartesian(),
#pbermuda0.85_s_3+theme_use+ggtitle("")+coord_cartesian(),
#pbermuda0.85_s_4+theme_use+ggtitle("")+coord_cartesian(),
#pbermuda0.85_s_5+theme_use2+ggtitle("")+coord_cartesian(),
#scanorama
pscanorama_s_2+theme_use2+ylab("scanorama")+ggtitle("")+
theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
legend.title = element_blank())+coord_cartesian(),
pscanorama_s_3+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_s_4+theme_use+ggtitle("")+coord_cartesian(),
pscanorama_s_5+theme_use2+ggtitle("")+coord_cartesian(),
ncol=4,draw=F)
## Supplementary Figure 3
ggsave(file.path(fig_path,"Figure.S3.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S3.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S3.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp