Go back to desc homepage

Data Summary

We used the data generated by the following paper Molecular Classification and Comparative Taxonomics of Foveal and Peripheral Cells in Primate Retina. The original paper has 165679 cells, which includ 42020 retinal ganglion cells (RGCs), 36268 non-neuronal (NN) , 30302 bipolar cells (BC), 30236 amacrine cells (AC), 24707 photoreceports (PR) and 2146 horizontal cells(HC), but here we only focus on 30302 BC cells.

options(warn = -1)
.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))
knitr::opts_chunk$set(echo=T,include = T)
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(png))
suppressPackageStartupMessages(library(RANN))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(vioplot))
suppressPackageStartupMessages(library(RColorBrewer))
datadirpath="/media/xiaoxiang/D/raw_data/DECv.1/monkey_result"
fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
# load necessary func
source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
                                         panel.background = element_blank(),
                                         panel.grid =element_blank()))
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}
op=par(mar=c(5,4,6,4))
color_celltype=c('#1CE6FF','#FF34FF','#FF4A46','#008941','#A30059','#FF2F80','#0000A6','#63FFAC','#004D43','#8FB0FF','#4FC601','#3B5DFF','#4A3B53','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')
image(1:length(color_celltype),1, as.matrix(1:length(color_celltype)),col=color_celltype,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(color_celltype)),labels=color_celltype,las=2,lwd=0)

op=par(mar=c(5,4,6,4))
col.set.update <- c("#c10023", "#008e17", "#fb8500", "#f60000", "#FE0092", "#bc9000","#4ffc00", "#00bcac", "#0099cc",
                    "#D35400", "#00eefd", "#cf6bd6", "#99cc00", "#aa00ff", "#ff00ff", "#0053c8",
                    "#f2a287","#ffb3ff", "#800000", "#77a7b7", "#00896e", "#00cc99", "#007CC8")
image(1:length(col.set.update),1, as.matrix(1:length(col.set.update)),col=col.set.update,ylab="",xlab="",axes=F)
axis(3,at=seq(1:length(col.set.update)),labels=col.set.update,las=2,lwd=0)

The Clustering Result

DESC

For our method DESC, we tried four different situations.

  1. scaling across all cells,
  2. scaling by macaque_id
  3. scaling by sample
  4. scaling by region
df0=list()
df0$desc=list()
df0$cca=list()
df0$cca3.0=list()
df0$mnn=list()
df0$scVI=list()
df0$saucie=list()
df0$BERMUDA=list()
df0$scanorama=list()
KL_list=list()
KL_list$desc=list()
KL_list$cca=list()
KL_list$cca3.0=list()
KL_list$mnn=list()
KL_list$scvi=list()
KL_list$saucie=list()
KL_list$BERMUDA0.90=list()
KL_list$BERMUDA0.85=list()
KL_list$scanorama=list()
#if (file.exists("df0.RData")) load("df0.RData")

DESC–scaling across all cells

resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["all"]]=df_desc
df0$desc$dr_desc[["all"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "all" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="sample"),
               region=BatchKL(df_desc,df0$desc$dr_desc[["all"]],n_cells=100,batch="region"))
KL_list$desc[["all"]]=tmp
KL_list$desc[['all']]$Method="DESC"
KL_list$desc[['all']]$Group="All"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)
pdesc_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(pdesc_1,pdesc_2,pdesc_5,pdesc_4,pdesc_3,pdesc_KL,ncol=3,draw = F)

Note dr stands for that we compute KL divergence using the output of the embedded layer (32 dimension); tsne stands for that we compute KL divergence using the tsne coordinate(2 dimension);

DESC–scaling by group macaque_id

resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_macaque_id"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["macaque_id"]]=df_desc
df0$desc$dr_desc[["macaque_id"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "macaque_id" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="sample"),
               region=BatchKL(df_desc,df0$desc$dr_desc[["macaque_id"]],n_cells=100,batch="region"))
KL_list$desc[["macaque_id"]]=tmp
KL_list$desc[['macaque_id']]$Method="DESC"
KL_list$desc[['macaque_id']]$Group="macaque_id"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_m_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_m_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_m_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_m_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_m_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(pdesc_m_1,pdesc_m_2,pdesc_m_5,pdesc_m_4,pdesc_m_3,p_KL,ncol=3,draw = F)

DESC–scaling by group sample

resolution_use="0.3"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_sample/"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# column
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["sample"]]=df_desc
df0$desc$dr_desc[["sample"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "sample" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="sample"),
               region=BatchKL(df_desc,df0$desc$dr_desc[["sample"]],n_cells=100,batch="region"))
KL_list$desc[["sample"]]=tmp
KL_list$desc[['sample']]$Method="DESC"
KL_list$desc[['sample']]$Group="sample"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_s_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_s_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_s_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_s_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_s_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

DESC–scaling by group region

resolution_use="0.1"
desc_use=paste0("desc_",resolution_use)
tsne_use=paste0("tsne",resolution_use)
obsm_use=paste0("X_Embeded_z",resolution_use)
dataset="desc_results_monkeyRetina_region/"
#dataset="bc0502"
filename=paste0(datadirpath,"/",dataset,"/adata_desc.h5ad")
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# column
#obj=Convert_from_anndata_to_seurat(adata,raw.X.slot="count.data")
df_desc=getplotdata(adata,reduction.use = tsne_use)
df_desc$sample=NULL
df_desc$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$desc$df_desc[["region"]]=df_desc
df0$desc$dr_desc[["region"]]=py_to_r(adata$obsm[[obsm_use]]) #just for KL
if (! "region" %in% names(KL_list$desc)){
tmp=data.frame(macaque_id=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="sample"),
               region=BatchKL(df_desc,df0$desc$dr_desc[["region"]],n_cells=100,batch="region"))
KL_list$desc[["region"]]=tmp
KL_list$desc[['region']]$Method="DESC"
KL_list$desc[['region']]$Group="region"
}
df_desc[,desc_use]=factor(as.numeric(as.character(df_desc[,desc_use])))
ari=mclust::adjustedRandIndex(df_desc[,'cluster'],df_desc[,desc_use])
print(ari)
pdesc_r_1=getplot(df_desc,by.group = desc_use,ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pdesc_r_2=getplot(df_desc,by.group = "cluster",ggtitle0 = "Celltype",pt.size = 0.2)
pdesc_r_3=getplot(df_desc,by.group = "macaque_id",ggtitle0 = "macaqueID",pt.size = 0.2)
pdesc_r_4=getplot(df_desc,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pdesc_r_5=getplot(df_desc,by.group = "sample",ggtitle0 = "sampleID",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

CCA3.0

macaque_id, top 2000

batch_id="macaque_id"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[["macaque_id"]]=df_cca3.0
df0$cca3.0$dr_cca3.0[["macaque_id"]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
#getplot(df_cca3.0,by.group = "sample",pt.size = 0.1)
cca3.0_use="macaque_idtop2000"
if (! "macaque_id" %in% names(KL_list$cca3.0)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca3.0$df_cca3.0[["macaque_id"]],df0$cca3.0$dr_cca3.0[["macaque_id"]],n_cells=100,batch="region"))
KL_list$cca3.0[["macaque_id"]]=tmp
KL_list$cca3.0[['macaque_id']]$Method="Seurat3.0"
KL_list$cca3.0[['macaque_id']]$Group="macaque_id"
}
df_cca3.0[,"integrated_snn_res.0.1"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.1"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.1"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_m_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_m_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_m_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_m_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_m_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

sampleID, top 2000

batch_id="sample"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[[batch_id]]=df_cca3.0
df0$cca3.0$dr_cca3.0[[batch_id]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
cca3.0_use="sampletop2000"
if (! "sample" %in% names(KL_list$cca3.0)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca3.0$df_cca3.0[["sample"]],df0$cca3.0$dr_cca3.0[["sample"]],n_cells=100,batch="region"))
KL_list$cca3.0[["sample"]]=tmp
KL_list$cca3.0[['sample']]$Method="Seurat3.0"
KL_list$cca3.0[['sample']]$Group="sample"
}
df_cca3.0[,"integrated_snn_res.0.1"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.1"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.1"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_s_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_s_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_s_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_s_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_s_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region, top 2000

batch_id="region"
detach("package:Seurat",unload = T)
library("Seurat",lib.loc = "/usr/lib/R/self_library/")
filename=paste0(datadirpath,"/cca3.0_result/","obj_cca3.0_",batch_id,"_2000.rds")
obj=readRDS(filename)
df_cca3.0=cbind(obj[["tsne"]]@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca3.0$df_cca3.0[["region"]]=df_cca3.0
df0$cca3.0$dr_cca3.0[["region"]]=obj[["pca"]]@cell.embeddings[,1:20]
detach("package:Seurat",unload = T)
library("Seurat")
cca3.0_use="regiontop2000"
if (! "region" %in% names(KL_list$cca3.0)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca3.0$df_cca3.0[["region"]],df0$cca3.0$dr_cca3.0[["region"]],n_cells=100,batch="region"))
KL_list$cca3.0[["region"]]=tmp
KL_list$cca3.0[['region']]$Method="Seurat3.0"
KL_list$cca3.0[['region']]$Group="region"
}
df_cca3.0[,"integrated_snn_res.0.01"]=factor(as.numeric(as.character(df_cca3.0[,"integrated_snn_res.0.01"])))
df_cca3.0[,"res"]=df_cca3.0[,"integrated_snn_res.0.01"]
df0$cca3.0$df_cca3.0[["macaque_id"]]$res=df_cca3.0$res
ari=mclust::adjustedRandIndex(df_cca3.0[,'cluster'],df_cca3.0[,"res"])
pcca3.0_r_1=getplot(df_cca3.0,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca3.0_r_2=getplot(df_cca3.0,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca3.0_r_3=getplot(df_cca3.0,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca3.0_r_4=getplot(df_cca3.0,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca3.0_r_5=getplot(df_cca3.0,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

Note The minumum resolution I used is 0.2, so the number of clusters is larger than the number of true celltype.

CCA

macaque_id, top 2000

batch_id="macaque_id"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["macaque_id"]]=df_cca
df0$cca$dr_cca[["macaque_id"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="macaque_idtop2000"
if (! "macaque_id" %in% names(KL_list$cca)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca$df_cca[["macaque_id"]],df0$cca$dr_cca[["macaque_id"]],n_cells=100,batch="region"))
KL_list$cca[["macaque_id"]]=tmp
KL_list$cca[['macaque_id']]$Method="CCA"
KL_list$cca[['macaque_id']]$Group="macaque_id"
}
df_cca[,"res.0.1"]=factor(as.numeric(as.character(df_cca[,"res.0.1"])))
df_cca[,"res"]=df_cca[,"res.0.1"]
df0$cca$df_cca[["macaque_id"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res.0.1"])
pcca_m_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_m_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_m_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_m_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_m_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

sampleID, top 2000

batch_id="sample"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["sample"]]=df_cca
df0$cca$dr_cca[["sample"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="sampletop2000"
if (! "sample" %in% names(KL_list$cca)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca$df_cca[["sample"]],df0$cca$dr_cca[["sample"]],n_cells=100,batch="region"))
KL_list$cca[["sample"]]=tmp
KL_list$cca[['sample']]$Method="CCA"
KL_list$cca[['sample']]$Group="sample"
}
df_cca[,"res.0.1"]=factor(as.numeric(as.character(df_cca[,"res.0.1"])))
df_cca[,"res"]=df_cca[,"res.0.1"]
df0$cca$df_cca[["sample"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res"])
pcca_s_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_s_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_s_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_s_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_s_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region, top 2000

batch_id="region"
filename=paste0(datadirpath,"/cca_result/","obj_cca_",batch_id,"_2000_False.rds")
obj=readRDS(filename)
df_cca=cbind(obj@dr$tsne@cell.embeddings[,c(1,2)],obj@meta.data)
df0$cca$df_cca[["region"]]=df_cca
df0$cca$dr_cca[["region"]]=obj@dr$cca@cell.embeddings
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
cca_use="regiontop2000"
if (! "region" %in% names(KL_list$cca)){
tmp=data.frame(
               macaque_id=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$cca$df_cca[["region"]],df0$cca$dr_cca[["region"]],n_cells=100,batch="region"))
KL_list$cca[["region"]]=tmp
KL_list$cca[['region']]$Method="CCA"
KL_list$cca[['region']]$Group="region"
}
df_cca[,"res.0.2"]=factor(as.numeric(as.character(df_cca[,"res.0.2"])))
df_cca[,"res"]=df_cca[,"res.0.2"]
df0$cca$df_cca[["region"]]$res=df_cca$res
ari=mclust::adjustedRandIndex(df_cca[,'cluster'],df_cca[,"res"])
pcca_r_1=getplot(df_cca,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pcca_r_2=getplot(df_cca,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pcca_r_3=getplot(df_cca,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pcca_r_4=getplot(df_cca,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pcca_r_5=getplot(df_cca,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

MNN

macaque_id, top 2000

batch_id="macaque_id"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["macaque_id"]]=df_mnn
df0$mnn$dr_mnn[["macaque_id"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$mnn)){
tmp=data.frame(
               macaque_id=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$mnn$df_mnn[["macaque_id"]],df0$mnn$dr_mnn[["macaque_id"]],n_cells=100,batch="region"))
KL_list$mnn[["macaque_id"]]=tmp
KL_list$mnn[['macaque_id']]$Method="MNN"
KL_list$mnn[['macaque_id']]$Group="macaque_id"
}
df_mnn[,"res.0.1"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.1"])))
df_mnn[,"res"]=df_mnn[,"res.0.1"]
df0$mnn$df_mnn[["macaque_id"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_m_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_m_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_m_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_m_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_m_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

sample, top 2000

batch_id="sample"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["sample"]]=df_mnn
df0$mnn$dr_mnn[["sample"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "sample" %in% names(KL_list$mnn)){
tmp=data.frame(
               macaque_id=BatchKL(df0$mnn$df_mnn[["sample"]],df0$cca$dr_mnn[["sample"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$mnn$df_mnn[["sample"]],df0$mnn$dr_mnn[["sample"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$mnn$df_mnn[["sample"]],df0$mnn$dr_mnn[["sample"]],n_cells=100,batch="region"))
KL_list$mnn[["sample"]]=tmp
KL_list$mnn[['sample']]$Method="MNN"
KL_list$mnn[['sample']]$Group="sample"
}
df_mnn[,"res.0.4"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.4"])))
df_mnn[,"res"]=df_mnn[,"res.0.4"]
df0$cca$df_mnn[["sample"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_s_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_s_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_s_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_s_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_s_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region, top 2000

batch_id="region"
filename=paste0(datadirpath,"/mnn_result/","adata_mnn_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df_mnn=getplotdata(adata,reduction.use = "tsne")
df_mnn$sample=NULL
df_mnn$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$mnn$df_mnn[["region"]]=df_mnn
df0$mnn$dr_mnn[["region"]]=py_to_r(adata$obsm["X_pca"])[,c(1:20)]
if (! "region" %in% names(KL_list$mnn)){
tmp=data.frame(
               macaque_id=BatchKL(df0$mnn$df_mnn[["region"]],df0$cca$dr_mnn[["region"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$mnn$df_mnn[["region"]],df0$mnn$dr_mnn[["region"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$mnn$df_mnn[["region"]],df0$mnn$dr_mnn[["region"]],n_cells=100,batch="region"))
KL_list$mnn[["region"]]=tmp
KL_list$mnn[['region']]$Method="MNN"
KL_list$mnn[['region']]$Group="region"
}
df_mnn[,"res.0.1"]=factor(as.numeric(as.character(df_mnn[,"louvain_ori0.1"])))
df_mnn[,"res"]=df_mnn[,"res.0.1"]
df0$cca$df_mnn[["sample"]]$res=df_mnn$res
ari=mclust::adjustedRandIndex(df_mnn[,'cluster'],df_mnn[,"res"])
pmnn_r_1=getplot(df_mnn,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pmnn_r_2=getplot(df_mnn,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pmnn_r_3=getplot(df_mnn,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pmnn_r_4=getplot(df_mnn,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pmnn_r_5=getplot(df_mnn,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

scVI

scVI-scaling across all cells (top 2000 genes)

batch_id="nobatch"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["all"]]=df_scvi
df0$scvi$dr_scvi[["all"]]=py_to_r(adata$X)
if (! "all" %in% names(KL_list$scvi)){
tmp=data.frame(
               macaque_id=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$scvi$df_scvi[["all"]],df0$scvi$dr_scvi[["all"]],n_cells=100,batch="region"))
KL_list$scvi[["all"]]=tmp
KL_list$scvi[['all']]$Method="scVI"
KL_list$scvi[['all']]$Group="All"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.05"])))
df0$scvi$df_scvi[["macaque_id"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

macaque_id, top 2000

batch_id="macaque_id"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["macaque_id"]]=df_scvi
df0$scvi$dr_scvi[["macaque_id"]]=py_to_r(adata$X)
if (! "macaque_id" %in% names(KL_list$scvi)){
tmp=data.frame(
               macaque_id=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$scvi$df_scvi[["macaque_id"]],df0$scvi$dr_scvi[["macaque_id"]],n_cells=100,batch="region"))
KL_list$scvi[["macaque_id"]]=tmp
KL_list$scvi[['macaque_id']]$Method="scVI"
KL_list$scvi[['macaque_id']]$Group="macaque_id"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.2"])))
df0$scvi$df_scvi[["macaque_id"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_m_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_m_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_m_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_m_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_m_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

sample, top 2000

batch_id="sample"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["sample"]]=df_scvi
df0$scvi$dr_scvi[["sample"]]=py_to_r(adata$X)
if (! "sample" %in% names(KL_list$scvi)){
tmp=data.frame(
               macaque_id=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$scvi$df_scvi[["sample"]],df0$scvi$dr_scvi[["sample"]],n_cells=100,batch="region"))
KL_list$scvi[["sample"]]=tmp
KL_list$scvi[['sample']]$Method="scVI"
KL_list$scvi[['sample']]$Group="sample"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.4"])))
df0$scvi$df_scvi[["sample"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_s_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_s_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_s_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_s_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_s_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region, top 2000

batch_id="region"
filename=paste0(datadirpath,"/scvi_result/","adata_scvi_",batch_id,"_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata$obs[["sampleID"]]=adata$obs["sample"]$astype("str")
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scvi=getplotdata(adata,reduction.use = "tsne")
df_scvi$sample=NULL
df_scvi$sample=py_to_r(adata$obs["sample"]$astype("str"))
df0$scvi$df_scvi[["region"]]=df_scvi
df0$scvi$dr_scvi[["region"]]=py_to_r(adata$X)
if (! "region" %in% names(KL_list$scvi)){
tmp=data.frame(
               macaque_id=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="sample"),
               region=BatchKL(df0$scvi$df_scvi[["region"]],df0$scvi$dr_scvi[["region"]],n_cells=100,batch="region"))
KL_list$scvi[["region"]]=tmp
KL_list$scvi[['region']]$Method="scVI"
KL_list$scvi[['region']]$Group="region"
}
df_scvi[,"res"]=factor(as.numeric(as.character(df_scvi[,"louvain_ori0.1"])))
df0$scvi$df_scvi[["sample"]]$res=df_scvi$res
ari=mclust::adjustedRandIndex(df_scvi[,'cluster'],df_scvi[,"res"])
pscvi_r_1=getplot(df_scvi,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscvi_r_2=getplot(df_scvi,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscvi_r_3=getplot(df_scvi,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscvi_r_4=getplot(df_scvi,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscvi_r_5=getplot(df_scvi,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

BERMUDA

BERMUDA0.90

macaque_id

batch_id="macaque_id"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_macaque_id.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
df0$BERMUDA0.90$df_bermuda[["macaque_id"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.90$df_bermuda[["macaque_id"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["macaque_id"]]=tmp
KL_list$BERMUDA0.90[['macaque_id']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.5"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.5"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.5"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["macaque_id"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_m_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_m_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_m_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_m_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_m_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(pbermuda0.90_m_1,pbermuda0.90_m_2,pbermuda0.90_m_3,pbermuda0.90_m_4,pbermuda0.90_m_5,ncol=3,draw = F)

sample

batch_id="sample"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_sample.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.90$df_bermuda[["sample"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "sample" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.90$df_bermuda[["sample"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["sample"]]=tmp
KL_list$BERMUDA0.90[['sample']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['sample']]$Group="sample"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.18"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.18"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.18"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["sample"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_s_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_s_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_s_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_s_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_s_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region

batch_id="region"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.9_region.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.90$df_bermuda[["region"]]=df_bermuda
df0$BERMUDA0.90$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "region" %in% names(KL_list$BERMUDA0.90)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.90$df_bermuda[["region"]],df0$BERMUDA0.90$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.90[["region"]]=tmp
KL_list$BERMUDA0.90[['region']]$Method="BERMUDA"
KL_list$BERMUDA0.90[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.05"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.05"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.05"]
#df_bermuda0.90=df_bermuda
df0$BERMUDA$df_bermuda[["region"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.90_r_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.90_r_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.90_r_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.90_r_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.90_r_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

BERMUDA0.85

macaque_id

batch_id="macaque_id"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_macaque_id.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
df0$BERMUDA0.85$df_bermuda[["macaque_id"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "macaque_id" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.85$df_bermuda[["macaque_id"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["macaque_id"]]=tmp
KL_list$BERMUDA0.85[['macaque_id']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.1"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.1"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.1"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["macaque_id"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_m_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_m_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_m_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_m_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_m_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(pbermuda0.85_m_1,pbermuda0.85_m_2,pbermuda0.85_m_3,pbermuda0.85_m_4,pbermuda0.85_m_5,ncol=3,draw = F)

sample

batch_id="sample"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_sample.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.85$df_bermuda[["sample"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "sample" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.85$df_bermuda[["sample"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["sample"]]=tmp
KL_list$BERMUDA0.85[['sample']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['sample']]$Group="sample"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.011"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.011"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.011"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["sample"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_s_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_s_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_s_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_s_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_s_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region

batch_id="region"
filename=paste0(datadirpath,"/BERMUDA_result/adata_bermuda0.85_region.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_bermuda=getplotdata(adata,reduction.use = "tsne")%>%left_join(df_desc[,c("cellname","macaque_id","sample","region","cluster")],by="cellname")
#df_bermuda$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$BERMUDA0.85$df_bermuda[["region"]]=df_bermuda
df0$BERMUDA0.85$dr_bermuda=py_to_r(adata$obsm["X_bermuda"])[,c(1:20)]
if (! "region" %in% names(KL_list$BERMUDA0.85)){
tmp=data.frame(macaque_id=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="sample"),
               region=BatchKL(df0$BERMUDA0.85$df_bermuda[["region"]],df0$BERMUDA0.85$dr_bermuda,n_cells=100,batch="region"))
KL_list$BERMUDA0.85[["region"]]=tmp
KL_list$BERMUDA0.85[['region']]$Method="BERMUDA0.85"
KL_list$BERMUDA0.85[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_bermuda$cellname)
df_bermuda[,"louvain_0.2"]=factor(as.numeric(as.character(df_bermuda[,"louvain_0.2"])))
df_bermuda[,"res"]=df_bermuda[,"louvain_0.2"]
#df_bermuda0.85=df_bermuda
df0$BERMUDA$df_bermuda[["region"]]$res=df_bermuda$res
ari=mclust::adjustedRandIndex(df_bermuda[,'cluster'],df_bermuda[,"res"])
pbermuda0.85_r_1=getplot(df_bermuda,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pbermuda0.85_r_2=getplot(df_bermuda,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pbermuda0.85_r_3=getplot(df_bermuda,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pbermuda0.85_r_4=getplot(df_bermuda,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pbermuda0.85_r_5=getplot(df_bermuda,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

scanorama

macaque_id

batch_id="macaque_id"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_macaque_id_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["macaque_id"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "macaque_id" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
               region=BatchKL(df0$scanorama$df_scanorama[["macaque_id"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["macaque_id"]]=tmp
KL_list$scanorama[['macaque_id']]$Method="scanorama"
KL_list$scanorama[['macaque_id']]$Group="macaque_id"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.6"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.6"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.6"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["macaque_id"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_m_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_m_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_m_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_m_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_m_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

sample

batch_id="sample"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_sample_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["sample"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "sample" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
               region=BatchKL(df0$scanorama$df_scanorama[["sample"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["macaque_id"]]=tmp
KL_list$scanorama[['macaque_id']]$Method="scanorama"
KL_list$scanorama[['macaque_id']]$Group="sample"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.6"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.6"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.6"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["sample"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_s_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_s_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_s_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_s_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_s_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
#p0=egg::ggarrange(p1,p2,p5,p4,p3,p_KL,ncol=3,draw = F)

region

batch_id="region"
filename=paste0(datadirpath,"/scanorama_result/adata_scanorama_region_2000_False.h5ad")
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad(filename)
adata=change_obs(adata,exclude=c("n_genes","n_counts","sample"))# colum
df_scanorama=getplotdata(adata,reduction.use = "tsne")
df_scanorama$sample=NULL
df_scanorama$sample=py_to_r(adata$obs["sample"]$astype("str"))
#print(mclust::adjustedRandIndex(df_cca$cluster,df_cca$integrated_snn_res.0.2))
df0$scanorama$df_scanorama[["region"]]=df_scanorama
df0$scanorama$dr_scanorama=py_to_r(adata$obsm["X_scanorama"])[,c(1:100)]
if (! "region" %in% names(KL_list$scanorama)){
tmp=data.frame(macaque_id=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="macaque_id"),
               sample=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="sample"),
               region=BatchKL(df0$scanorama$df_scanorama[["region"]],df0$scanorama$dr_scanorama,n_cells=100,batch="region"))
KL_list$scanorama[["region"]]=tmp
KL_list$scanorama[['region']]$Method="scanorama"
KL_list$scanorama[['region']]$Group="region"
}
id0=match(rownames(df_desc),df_scanorama$cellname)
df_scanorama[,"louvain_0.4"]=factor(as.numeric(as.character(df_scanorama[,"louvain_0.4"])))
df_scanorama[,"res"]=df_scanorama[,"louvain_0.4"]
df_scanorama=df_scanorama
df0$scanorama$df_scanorama[["region"]]$res=df_scanorama$res
ari=mclust::adjustedRandIndex(df_scanorama[,'cluster'],df_scanorama[,"res"])
pscanorama_r_1=getplot(df_scanorama,by.group = "res",ggtitle0 = paste0("Cluster","(ARI=",round(ari,3),")"),pt.size = 0.2)+theme(plot.title = element_text(color="red"))
pscanorama_r_2=getplot(df_scanorama,by.group = "cluster",ggtitle0 = "celltype",pt.size = 0.2)
pscanorama_r_3=getplot(df_scanorama,by.group = "macaque_id",ggtitle0 = "macaque_id",pt.size = 0.2)
pscanorama_r_4=getplot(df_scanorama,by.group = "region",ggtitle0 = "region",pt.size = 0.2)
pscanorama_r_5=getplot(df_scanorama,by.group = "sample",ggtitle0 = "sample",pt.size = 0.2)
## save results for downstreaming plot, which only can be seen myself.
#save(KL_list,df0,file = "KL_list.RData")
#save.image(file="all.RData")

Figure 1

ari_value=c(mclust::adjustedRandIndex(df0$desc$df_desc$all$desc_0.1,df0$desc$df_desc$all$cluster),
                          mclust::adjustedRandIndex(df0$desc$df_desc$macaque_id$desc_0.1,df0$desc$df_desc$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$desc$df_desc$sample$desc_0.3,df0$desc$df_desc$sample$cluster),
                          mclust::adjustedRandIndex(df0$desc$df_desc$region$desc_0.1,df0$desc$df_desc$region$cluster),
                          #cca3.0
                          mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$macaque_id$integrated_snn_res.0.1,df0$cca3.0$df_cca3.0$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$sample$integrated_snn_res.0.3,df0$cca3.0$df_cca3.0$sample$cluster),
                          mclust::adjustedRandIndex(df0$cca3.0$df_cca3.0$region$integrated_snn_res.0.01,df0$cca3.0$df_cca3.0$region$cluster),
                          #cca
                          mclust::adjustedRandIndex(df0$cca$df_cca$macaque_id$res.0.1,df0$cca$df_cca$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$cca$df_cca$sample$res.0.1,df0$cca$df_cca$sample$cluster),
                          mclust::adjustedRandIndex(df0$cca$df_cca$region$res.0.2,df0$cca$df_cca$region$cluster),
                          #mnn
                          mclust::adjustedRandIndex(df0$mnn$df_mnn$macaque_id$louvain_ori0.1,df0$mnn$df_mnn$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$mnn$df_mnn$sample$louvain_ori0.4,df0$mnn$df_mnn$sample$cluster),
                          mclust::adjustedRandIndex(df0$mnn$df_mnn$region$louvain_ori0.1,df0$mnn$df_mnn$region$cluster),
                          #scvi
                          mclust::adjustedRandIndex(df0$scvi$df_scvi$all$louvain_ori0.05,df0$scvi$df_scvi$all$cluster),
                          mclust::adjustedRandIndex(df0$scvi$df_scvi$macaque_id$louvain_ori0.2,df0$scvi$df_scvi$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$scvi$df_scvi$sample$louvain_ori0.4,df0$scvi$df_scvi$sample$cluster),
                          mclust::adjustedRandIndex(df0$scvi$df_scvi$region$louvain_ori0.1,df0$scvi$df_scvi$region$cluster),
                          #BERMUDA0.90
                          mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$macaque_id$louvain_0.5,df0$BERMUDA0.90$df_bermuda$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$sample$louvain_0.18,df0$BERMUDA0.90$df_bermuda$sample$cluster),
                          mclust::adjustedRandIndex(df0$BERMUDA0.90$df_bermuda$region$louvain_0.05,df0$BERMUDA0.90$df_bermuda$region$cluster),
                          #BERMUDA0.85
                          mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$macaque_id$louvain_0.1,df0$BERMUDA0.85$df_bermuda$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$sample$louvain_0.011,df0$BERMUDA0.85$df_bermuda$sample$cluster),
                          mclust::adjustedRandIndex(df0$BERMUDA0.85$df_bermuda$region$louvain_0.2,df0$BERMUDA0.85$df_bermuda$region$cluster),
                          #scanorama
                          mclust::adjustedRandIndex(df0$scanorama$df_scanorama$macaque_id$louvain_0.6,df0$scanorama$df_scanorama$macaque_id$cluster),
                          mclust::adjustedRandIndex(df0$scanorama$df_scanorama$sample$louvain_0.6,df0$scanorama$df_scanorama$sample$cluster),
                          mclust::adjustedRandIndex(df0$scanorama$df_scanorama$region$louvain_0.4,df0$scanorama$df_scanorama$region$cluster)
)
#ARI
ARI_data=data.frame(ARI=ari_value,
                    UseBatchID=c("All",rep(c("macaque_id","sample","region"),times=4),
                                 c("All","macaque_id","sample","region"),rep(c("macaque_id","sample","region"),times=3)),
                    Method=c("DESC",rep(c("DESC","Seurat3.0","CCA","MNN"),each=3),rep("scVI",times=4),
                             rep("BERMUDA0.90",time=3),rep("BERMUDA0.85",times=3),rep("scanorama",times=3)))
ARI_data$Method=factor(ARI_data$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","BERMUDA0.85","scanorama"))
                          
p_ari=ggplot(data=ARI_data,aes(x=factor(Method),y=ARI,fill=UseBatchID,width=0.85))+
  geom_bar(position= position_dodge(width = 0.8,preserve = "single"),stat="identity")+
  xlab("")+
  ylab("Adjusted Rand Index")+
  scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.02))+
  theme(axis.text.x = element_text(angle=60,hjust=1))+
  ggtitle("")+
  #scale_fill_manual(values=colors_use)+
  theme(legend.key.size = unit(0.6,"cm"))
#ARI
ARI_data=data.frame(ARI=ari_value,
                    UseBatchID=c("All",rep(c("macaque_id","sample","region"),times=4),
                                 c("All","macaque_id","sample","region"),rep(c("macaque_id","sample","region"),times=3)),
                    Method=c("DESC",rep(c("DESC","Seurat3.0","CCA","MNN"),each=3),rep("scVI",times=4),
                             rep("BERMUDA0.90",time=3),rep("BERMUDA0.85",times=3),rep("scanorama",times=3)))
ARI_data=subset(ARI_data,!Method%in%c("BERMUDA0.85"))
ARI_data$Method=factor(ARI_data$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","scanorama"),
                       labels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA","scanorama"))
                          
p_ari=ggplot(data=ARI_data,aes(x=factor(Method),y=ARI,fill=UseBatchID,width=0.85))+
  geom_bar(position= position_dodge(width = 0.8,preserve = "single"),stat="identity")+
  xlab("")+
  ylab("Adjusted Rand Index")+
  scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.02))+
  theme(axis.text.x = element_text(angle=60,hjust=1))+
  ggtitle("")+
  #scale_fill_manual(values=colors_use)+
  theme(legend.key.size = unit(0.6,"cm"))
theme_use=theme(legend.key.size = unit(1,"cm"),
                legend.key.width = unit(0,'cm'),
                legend.text = element_text(size=16),
                legend.title = element_text(size=20),
                legend.key.height = unit(1.0,"cm"),
                axis.text.x = element_blank(),
                axis.text.y = element_blank())

theme_use2=theme(legend.key.size = unit(0.5,"cm"),
                  legend.text = element_text(size=8),
                  legend.key.height = unit(0.5,"cm"),
                  legend.title = element_text(size=12),
                 axis.text.x = element_blank(),
                axis.text.y = element_blank())

theme_use_feature=theme(legend.justification=0.5,
                legend.key.size = unit(1,"cm"),
                legend.text = element_text(size=25),
                legend.title = element_text(size=30),
                legend.key.height = unit(1.3,"cm"),
                axis.text.x = element_blank(),
                axis.text.y=element_blank())
#pp1=egg::ggarrange(pdesc_s_3+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom"),
#                 pdesc_s_5+theme_use2+ggtitle("DESC")+coord_cartesian()+theme(legend.text = element_text(size=15),legend.key = ),
#                 pdesc_s_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom"),
#                 ncol=2,draw=F)
p1b=pdesc_s_3+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom")
p1d= pdesc_s_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "bottom")
p1c= pdesc_s_5+theme_use2+ggtitle("DESC")+coord_cartesian()+theme(legend.text = element_text(size=15))

p0=p_ari+theme(legend.text = element_text(size=15),
               #legend.key.size = unit(0.5,"cm"),
               #legend.key.height = unit(0.5,"cm"),
                legend.title = element_blank(),
               axis.text.x = element_text(angle = 30, size = 15, hjust = 1,color="black"),
  axis.text.y = element_text(angle = 0, vjust = 0, size = 15, hjust = 0.5,color="black"),
  axis.title.x = element_text(angle = 0, vjust = 1, size = 15, hjust = 0.5),
  axis.title.y = element_text(angle = 90, vjust = 1, size = 15, hjust = 0.5),
  plot.title = element_text(hjust=0.5,size=12,face="bold",lineheight=0.8,color="black"),
  panel.background=element_rect(fill = "white", colour = "grey50"))
fig_height=20.6
workflow=ggdraw()+draw_image(magick::image_read("~/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/workflow.png"),scale=1)# width/heigth=201/133.4
p_1=ggdraw()+
  draw_plot(workflow,x=0.,y=10/fig_height,height=10.59/fig_height,width=1)+
  draw_plot(p1b,x=0.01,y=5/fig_height,height = 5/fig_height,width=5.5/16)+
  draw_plot(p1c,x=6.2/16,y=5/fig_height+0.035,height = 5/fig_height-0.035,width=9.8/16)+
  draw_plot(p1d,x=0.01,y=0,height = 5/fig_height,width=5.5/16)+
  draw_plot(p0,x=6.2/16,y=0,height = 5/fig_height-0.015,width=9.5/16)+
  draw_label("a",x=0.01,y=1,hjust=0.5,vjust=1,fontface = "bold",size=30)+
  draw_label("b",x=0.01,y=10/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
  draw_label("c",x=6.2/16,y=10/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
  draw_label("d",x=0.01,y=5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)+
  draw_label("e",x=6.2/16,y=5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=30)
#Figure 1
ggsave(file.path(fig_path,"Figure.1.tiff"),p_1,width=16,height = 20.6,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.1.pdf"),p_1,width=16,height = 20.6,dpi=300)
ggsave(file.path(fig_path,"Figure.1.eps"),p_1,width=16,height = 20.6,dpi=300)
p_1

Figure 10 (left)

#ARI
suppressPackageStartupMessages(library(dplyr))
color_tmp=c('#B903AA','#1CE6FF','#FF34FF','#FF4A46','#008941','#006FA6','#7A4900','#63FFAC','#B79762','#004D43','#8FB0FF','#997D87','#5A0007','#809693','#FEFFE6','#1B4400','#4FC601','#3B5DFF','#4A3B53','#FF2F80','#61615A','#BA0900','#6B7900','#00C2A0','#FFAA92','#FF90C9','#B903AA')

ARI_data0=ARI_data
tmp0=as.character(ARI_data$Method)
tmp0[tmp0%in%c("BERMUDA0.90","BERMUDA0.85")]="BERMUDA"
ARI_data0$Method=tmp0

zz=ARI_data0%>%group_by(Method)%>%summarise(mean=mean(ARI),sd=sd(ARI))
zz$Method=factor(zz$Method,levels=c("DESC","scVI","scanorama","MNN","BERMUDA","Seurat3.0","CCA"))
color_usetmp=color_tmp[1:7]
names(color_usetmp)=c("DESC","scVI","scanorama","MNN","BERMUDA","Seurat3.0","CCA")
pp=ggplot()+geom_bar(data=zz,aes(x=Method,y=mean,fill=Method),stat="identity",width=0.4)+
  geom_point(data=zz,aes(x=Method,y=mean),color="black")+
  scale_fill_manual(values=color_usetmp,guide=F)+
  geom_errorbar(data=zz,aes(x=Method,ymin=mean-sd,ymax=mean+sd),width=0.2,size=1)+
  theme(axis.text.x = element_text(angle=60,hjust=1))+
  theme(axis.title.y = element_text(angle=-90,hjust=0.5))+
  #scale_y_continuous(expand = c(0,0,0.05,0),labels=scales::comma,position = "left",breaks = c(0,10,20,30,40),limits = c(0,40))+
  scale_x_discrete(expand=c(0.1,0.,0.05,0))+
  scale_y_continuous(expand = c(0,0),breaks=c(0,0.25,0.5,0.75,1.0),labels=c("0.0","0.25","0.50","0.75","1.00"),limits=c(0,1.1))+
  geom_text(aes(x=0.6,y=c(0.25,0.5,0.75,1.0,1.05)-0.02,label=c("0.25","0.50","0.75","1.00","ARI")))+
  theme(legend.position="none")+xlab("")+theme_void()+
  #geom_hline(aes(yintercept=c(0.25,0.5,0.75,1.0)),linetype="dotted",alpha=1,color="darkgrey")
  theme( panel.grid.major.y = element_line(size=0.3,linetype = "dotted"))
#ggsave("~/Documents/DESC_paper_prepare/DESC_paper_v3/figures/figure_main/ari_table.pdf",pp,width = 9,height=3.7,dpi=300)
#Figure 10
ggsave(file.path(fig_path,"Fig.10.left.tiff"),pp,width=9,height = 3.7,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Fig.10.left.pdf"),pp,width=9,height = 3.7,dpi=300)
ggsave(file.path(fig_path,"Fig.10.left.eps"),pp,width=9,height = 3.7,dpi=300)
pp

Figure 2

data_KL0=do.call("rbind",lapply(KL_list,function(x) do.call("rbind",x)))
data_rect=data.frame(xmin=seq(0.5,6.5,by=1),
                     xmax=seq(1.5,7.5,by=1),
                     ymin=-Inf,
                     color=rep(c("#E8F2F4","#8FB0FF"),length=7),
                     ymax=Inf,stringsAsFactors = F)

p_list_KL=lapply(c("macaque_id","region","sample"),function(zz){
  data_KL=subset(data_KL0, !Method%in%"BERMUDA0.85", select=c(zz,"Method","Group"))
  df=reshape2::melt(data_KL,id.vars=c("Method","Group"))
  #df$Method=factor(df$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA0.90","BERMUDA0.85","scanorama"))
  df$Method=factor(df$Method,levels=c("DESC","Seurat3.0","CCA","MNN","scVI","BERMUDA","scanorama"))
  colnames(df)=c("Method","Group","variable","value")
  df$useBatch=df$Group
  p_KL=ggplot()+
     geom_boxplot(data=df,aes(x=Method,y=value,color=useBatch),position= position_dodge(preserve = "single"))+
    xlab("")+
    scale_x_discrete(expand = c(0,0,0,0))+
    #scale_y_continuous(breaks = 4)+
     ylab("KL divergence")+
     theme_self+theme(
     #aspect.ratio = 1,
     axis.text.x   = element_text(angle = 30,hjust=1,vjust=1),
     panel.border =element_blank(),
     axis.line = element_line())+
     ggtitle(paste0("KL divergence of ", zz))+
    geom_rect(data=data_rect,aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,fill=color),alpha=0.05)+
    scale_fill_discrete(guide=F)
  return(p_KL)
  }
)
p_KL=egg::ggarrange(p_list_KL[[1]]+theme(legend.position = "none"),
                    p_list_KL[[2]]+theme(legend.position = "none",axis.title.y = element_blank()),
                    p_list_KL[[3]]+theme(axis.title.y = element_blank()),ncol=3,draw=F)
p_KL

p_legend=get_legend(pdesc_4+theme(legend.position = "top",legend.title = element_blank(),
                                  legend.spacing.x = unit(1.5,"cm"),
                                  legend.text = element_text(size=25))+
                      guides(colour = guide_legend(override.aes = list(alpha = 1, size=15),nrow=1)))


#pp2=egg::ggarrange(
pp2=list(pdesc_r_4+theme_use+ggtitle("DESC")+coord_cartesian()+theme(legend.position = "none"),
                 pcca3.0_r_4+theme_use+ggtitle("Seurat3.0")+coord_cartesian()+theme(legend.position = "none"),
                 pcca_r_4+theme_use+ggtitle("CCA")+coord_cartesian()+theme(legend.position = "none"),
                 pmnn_r_4+theme_use+ggtitle("MNN")+coord_cartesian()+theme(legend.position = "none"),
                 pscvi_r_4+theme_use+ggtitle("scVI")+coord_cartesian()+theme(legend.position = "none"),
                 #pscvi_4+theme_use+ggtitle("scVI-no batchid")+coord_cartesian()+theme(legend.position = "none"),
                 pbermuda0.90_r_4+theme_use+ggtitle("BERMUDA")+coord_cartesian()+theme(legend.position = "none"),
                 #pbermuda0.85_m_4+theme_use+ggtitle("BERMUDA0.85")+coord_cartesian()+theme(legend.position = "none"),
                 pscanorama_r_4+theme_use+ggtitle("scanorama")+coord_cartesian()+theme(legend.position = "none"),
                 plot_grid(p_legend))
p2b=egg::ggarrange(plots=pp2,ncol = 3,draw = F)
## adding dummy grobs
#layout=matrix(1:9,3,3,byrow =T)
#ggsave("~/Documents/DESC_paper_prepare/DESC_paper_v3/figures/figure_main/fig2b.tiff",
#       grid.arrange(grobs=pp2,layout_matrix=layout),
#       width=21,height =17,dpi=200)
fig_height=25
p2=ggdraw()+
  draw_plot(p_KL,x=0.01,y=18/fig_height,height=7/fig_height,width=1)+
  draw_plot(p2b,x=0.02,y=0,height = 17.5/fig_height,width=0.94)+
  draw_label("a",x=0.01,y=1,hjust=0.5,vjust=1,fontface = "bold",size=35)+
  draw_label("b",x=0.01,y=17.5/fig_height,hjust=0.5,vjust=1,fontface = "bold",size=35)
## Figure 2
ggsave(file.path(fig_path,"Figure.2.tiff"),p2,width=24,height = 25,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.2.pdf"),p2,width=24,height = 25,dpi=300)
ggsave(file.path(fig_path,"Figure.2.eps"),p2,width=24,height = 25,dpi=300)
p2

Figure 3

theme_use=theme(legend.key.size = unit(1,"cm"),
                legend.key.width = unit(0,'cm'),
                legend.text = element_text(size=16),
                legend.title = element_text(size=20),
                legend.key.height = unit(1.0,"cm"),
                axis.text.x = element_blank(),
                axis.text.y = element_blank(),
                axis.line.x = element_blank(),
                axis.line.y=element_blank(),
                axis.title.y=element_blank(),
                axis.title.x =element_blank())

theme_use2=theme(legend.key.size = unit(0.6,"cm"),
                  legend.text = element_text(size=10),
                  legend.key.height = unit(0.6,"cm"),
                  legend.title = element_text(size=20),
                axis.text.x = element_blank(),
                axis.text.y = element_blank(),
                axis.line.x = element_blank(),
                axis.line.y=element_blank(),
                axis.title.y=element_blank(),
                axis.title.x =element_blank() )
theme_xx= theme(legend.position = "bottom")
  

p3_tmp=egg::ggarrange(pdesc_2+theme_use2+ylab("DESC")+
                       theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                             legend.title = element_blank())+theme_xx+
                       guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                       coord_cartesian(),
                 pdesc_3+theme_use+theme_xx+
                   guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pdesc_4+theme_use+ggtitle("regionID")+theme_xx+
                   guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pdesc_5+theme_use2+theme(plot.title = element_text(color="black",face="bold"))+theme_xx+
                   guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pscvi_2+theme_use2+ylab("scVI")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+theme_xx+
                   guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pscvi_3+theme_use+ggtitle("")+theme_xx+guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pscvi_4+theme_use+ggtitle("")+theme_xx+guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 pscvi_5+theme_use2+ggtitle("")+theme_xx+
                   guides(colour = guide_legend(override.aes = list(alpha = 1, size=5),title.position = "top",ncol=4))+
                   coord_cartesian(),
                 ncol=4,draw=F,
                 labels = c("a","","","","b","","",""),
                 label.args = list(gp = grid::gpar(cex =3.5,color="black",face="bold"),just="bottom"))
## Figure 3
ggsave(file.path(fig_path,"Figure.3.tiff"),p3_tmp,width=24,height = 17,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.3.pdf"),p3_tmp,width=24,height = 17,dpi=300)
ggsave(file.path(fig_path,"Figure.3.eps"),p3_tmp,width=24,height = 17,dpi=300)
p3_tmp

Supplementary Figure 1

p_tmp=egg::ggarrange(
                 #desc
                 pdesc_m_2+theme_use2+ylab("DESC")+
                       theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                             legend.title = element_blank())+coord_cartesian(),
                 pdesc_m_3+theme_use+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
                 pdesc_m_4+theme_use+ggtitle("regionID")+coord_cartesian(),
                 pdesc_m_5+theme_use2+coord_cartesian(),
      
                 ##cca3.0 with nobatch
                 pcca3.0_m_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca3.0_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 ##cca 
                 pcca_m_2+theme_use2+ylab("CCA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 ##MNN 
                 pmnn_m_2+theme_use2+ylab("MNN")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pmnn_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #scvi
                 pscvi_m_2+theme_use2+ylab("scVI")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscvi_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.90
                 pbermuda0.90_m_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pbermuda0.90_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.85
                 #pbermuda0.85_m_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
                #   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                #         legend.title = element_blank())+coord_cartesian(),
                # pbermuda0.85_m_3+theme_use+ggtitle("")+coord_cartesian(),
                # pbermuda0.85_m_4+theme_use+ggtitle("")+coord_cartesian(),
                #pbermuda0.85_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #scanorama
                 pscanorama_m_2+theme_use2+ylab("scanorama")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscanorama_m_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_m_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_m_5+theme_use2+ggtitle("")+coord_cartesian(),
                 ncol=4,draw=F)
## Supplementary Figure 1
ggsave(file.path(fig_path,"Figure.S1.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S1.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S1.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp

Supplementary Figure 2

p_tmp=egg::ggarrange(pdesc_r_2+theme_use2+ylab("DESC")+
                       theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                             legend.title = element_blank())+coord_cartesian(),
                 pdesc_r_3+theme_use+coord_cartesian(),
                 pdesc_r_4+theme_use+ggtitle("regionID")+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
                 pdesc_r_5+theme_use2+coord_cartesian(),
                 pcca3.0_r_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca3.0_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pcca_m_2+theme_use2+ylab("CCA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pmnn_r_2+theme_use2+ylab("MNN")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pmnn_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pscvi_r_2+theme_use2+ylab("scVI")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscvi_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.90
                 pbermuda0.90_r_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pbermuda0.90_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.85
                 #pbermuda0.85_r_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
                #   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                #         legend.title = element_blank())+coord_cartesian(),
                #pbermuda0.85_r_3+theme_use+ggtitle("")+coord_cartesian(),
                #pbermuda0.85_r_4+theme_use+ggtitle("")+coord_cartesian(),
                #pbermuda0.85_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #scanorama
                 pscanorama_r_2+theme_use2+ylab("scanorama")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscanorama_r_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_r_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_r_5+theme_use2+ggtitle("")+coord_cartesian(),
                 ncol=4,draw=F)
## Supplementary Figure 2
ggsave(file.path(fig_path,"Figure.S2.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S2.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S2.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp

Supplementary Figure 3

-3. Taking sample as batch

p_tmp=egg::ggarrange(pdesc_s_2+theme_use2+ylab("DESC")+
                       theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                             legend.title = element_blank())+coord_cartesian(),
                 pdesc_s_3+theme_use+coord_cartesian(),
                 pdesc_s_4+theme_use+ggtitle("regionID")+coord_cartesian(),
                 pdesc_s_5+theme_use2+theme(plot.title = element_text(color="darkblue",face="bold"))+coord_cartesian(),
                 pcca3.0_s_2+theme_use2+ylab("Seurat3.0")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca3.0_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca3.0_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pcca_s_2+theme_use2+ylab("CCA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pcca_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pcca_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pmnn_s_2+theme_use2+ylab("MNN")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pmnn_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pmnn_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 pscvi_s_2+theme_use2+ylab("scVI")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscvi_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscvi_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.90
                 pbermuda0.90_s_2+theme_use2+ylab("BERMUDA")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pbermuda0.90_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pbermuda0.90_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #BERMUDA0.85
                 #pbermuda0.85_s_2+theme_use2+ylab("BERMUDA0.85")+ggtitle("")+
                 #  theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                 #        legend.title = element_blank())+coord_cartesian(),
                 #pbermuda0.85_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 #pbermuda0.85_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 #pbermuda0.85_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 #scanorama
                 pscanorama_s_2+theme_use2+ylab("scanorama")+ggtitle("")+
                   theme(axis.title.y=element_text(size=30,angle = 90,color="red",face = "bold"),
                         legend.title = element_blank())+coord_cartesian(),
                 pscanorama_s_3+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_s_4+theme_use+ggtitle("")+coord_cartesian(),
                 pscanorama_s_5+theme_use2+ggtitle("")+coord_cartesian(),
                 ncol=4,draw=F)
## Supplementary Figure 3
ggsave(file.path(fig_path,"Figure.S3.tiff"),p_tmp,width=33,height = 40,dpi=300,compression="lzw")
ggsave(file.path(fig_path,"Figure.S3.pdf"),p_tmp,width=33,height = 40,dpi=300)
ggsave(file.path(fig_path,"Figure.S3.eps"),p_tmp,width=33,height = 40,dpi=300)
p_tmp