# load libraries
library(Seurat)
library(scrattch.hicat)
library(scrattch.vis)
library(RColorBrewer)
library(dendextend)
library(dplyr)
library(matrixStats)
library(Matrix)
# Set ggplot theme
theme_set(theme_classic())
# Extract Subpallial neuron clusters
gaba.clusters <- paste0( c("LN.GABA."),c("1", "7", "8", "9", "10", "11"))
GABA.LN.data <- SubsetData(Allcells.data, ident.use = gaba.clusters, subset.raw = T, do.clean = F)
# Exclude one outlier cell on Spring coordinates
outlier <- rownames(subset(GABA.LN.data@dr$spring@cell.embeddings, GABA.LN.data@dr$spring@cell.embeddings[,2] == max(GABA.LN.data@dr$spring@cell.embeddings[,2])))
Celltokeep <- GABA.LN.data@meta.data$Barcodes[GABA.LN.data@meta.data$Barcodes != outlier]
GABA.LN.data <- SubsetData(GABA.LN.data, cells.use = Celltokeep , subset.raw = T, do.clean = F)
rm(list = ls()[!ls() %in% c("GABA.LN.data")])
# Load the full dataset
Allcells.data <- readRDS("./QC.filtered.cells.RDS")
# Transfer the identities
Rename.Clust <- function(Clustdata, RawQCdata) {
unClustered.cells <- RawQCdata@meta.data$Barcodes
RawQCdata <- SetIdent(RawQCdata, cells.use = unClustered.cells, ident.use = "All.Unclustered.Cells")
for(i in unique(Clustdata@meta.data$Cluster.ident)){
New.ident <- i
Barcodes <- rownames(subset(Clustdata@meta.data, Clustdata@meta.data$Cluster.ident == i))
print(paste0("Cluster_",i,": ",length(Barcodes), " Cells"))
Barcodes <- Barcodes[Barcodes %in% rownames(RawQCdata@meta.data)]
RawQCdata <- SetIdent(RawQCdata, cells.use = Barcodes ,ident.use = paste0("LN.Glut.",i))
}
return(RawQCdata)
}
Allcells.data <- Rename.Clust(Clustdata = GABA.LN.data, RawQCdata = Allcells.data)
## [1] "Cluster_LN.GABA.1: 26 Cells"
## [1] "Cluster_LN.GABA.7: 45 Cells"
## [1] "Cluster_LN.GABA.8: 23 Cells"
## [1] "Cluster_LN.GABA.9: 16 Cells"
## [1] "Cluster_LN.GABA.10: 112 Cells"
## [1] "Cluster_LN.GABA.11: 54 Cells"
colors <- c("#c773a7", "#b79f0b", "#3ca73f", "#31b6bd", "#ec756d", "#7293c8")
# Prepare annotation for hicat pipeline
colorsident <- cbind(ident = unique(as.character(GABA.LN.data@ident)),
colors = colors,
id = unique(as.character(GABA.LN.data@ident)))
# Create annotation data.frame
anno.df <- as.data.frame(cbind(
sample_name = row.names(GABA.LN.data@meta.data),
primary_type_id = colorsident[match(as.character(GABA.LN.data@ident), colorsident[,1]),3],
primary_type_label = as.character(GABA.LN.data@ident),
primary_type_color = colorsident[match(as.character(GABA.LN.data@ident), colorsident[,1]),2]
))
# Make a data.frame of unique cluster id, type, color, and broad type
cl.df <- anno.df %>%
select(primary_type_id, primary_type_label, primary_type_color) %>%
unique()
colnames(cl.df)[1:3] <- c("cluster_id", "cluster_label", "cluster_color")
# Sort by cluster_id
cl.df <- arrange(cl.df, cluster_id)
row.names(cl.df) <- cl.df$cluster_id
cl.fact <- setNames(factor(anno.df$primary_type_id), anno.df$sample_name)
# Filter genes
num.cells <- Matrix::rowSums(GABA.LN.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE), grep(pattern = "^mt-", x = genes.use, value = TRUE), "Xist")
genes.use <- genes.use[!genes.use %in% GenesToRemove] ; rm(GenesToRemove, num.cells)
GABA.LN.data@raw.data <- GABA.LN.data@raw.data[genes.use, ]
GABA.LN.data <- NormalizeData(object = GABA.LN.data,
normalization.method = "LogNormalize",
scale.factor = round(median(GABA.LN.data@meta.data$nUMI)),
display.progress = F)
# Find all var genes
GABA.LN.data <- FindVariableGenes(object = GABA.LN.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.02,
x.high.cutoff = 3,
y.cutoff = 1,
do.plot = F, display.progress = F)
dgeMatrix_count <- as.matrix(GABA.LN.data@raw.data)[rownames(GABA.LN.data@raw.data) %in% GABA.LN.data@var.genes,]
dgeMatrix_cpm <- cpm(dgeMatrix_count)
norm.dat <- log2(dgeMatrix_cpm + 1) ; rm(dgeMatrix_cpm)
Data.matrix <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat) ; attach(Data.matrix)
rm(dgeMatrix_count,norm.dat)
Scrattch.hicat perform hierarchical clustering on a cluster correlation matrix based on median expression values for the top 50 most DEGs between every pair of clusters. It estimates branch confidence level using a bootstrap approach implemented by the pvclust package
# Build the dendrogram
dend.result <- build_dend(cl.med[,levels(cl.fact)],
l.color= setNames(as.character(cl.df$cluster_color), row.names(cl.df)),
nboot = 100)
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
# Attach cluster labels to the leaves of the tree
dend.labeled <- dend.result$dend
labels(dend.labeled) <- cl.df[labels(dend.labeled), "cluster_label"]
# Rotate dendrogramme leafs
NewOrder <- paste0("LN.GABA.", c(1,11,8,7,10,9))
cl.fact <- factor(cl.fact, levels = NewOrder)
l.rank <- setNames(1:nrow(cl.df), NewOrder) #set the cluster order
# Color of the leaf nodes.
l.color <- setNames(as.character(cl.df$cluster_color), row.names(cl.df))
dend.result <- build_dend(cl.med[,levels(cl.fact)],
l.rank,
l.color=l.color,
nboot = 100)
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
data <- cbind(sample_name = colnames(GABA.LN.data@data),
as.data.frame(t(as.matrix(GABA.LN.data@data))))
Selected.markers <- c("Foxg1","Bcl11b","Gad1", "Gad2","Dlx1","Dlx2",
"Dlx5","Dlx6", "Lhx6","Sst","Calb1","Gm17750",
"Npy", "Nxph1", "Rprm", "Elmo1","Arx","Meis2",
"Rbfox1","Zfp503","Pou3f1", "Isl1", "Ebf1",
"Dlc1","Dlk1","Nefm","Nefl","Sybu","Lypd1",
"Parvb", "Fgf15", "Zic1", "Th","Pnoc", "Crabp2",
"Klhdc8b","Pax6", "Sp8","Six3","Tshz2", "Phlda1")
sample_bar_plot(data,
anno.df,
genes = Selected.markers,
group_order = levels(cl.fact),
grouping = "primary_type",
log_scale = FALSE,
font_size = 7,
label_height = 10,
label_type = "angle",
bg_color ="#f7f7f7")
FeaturePlot(object = GABA.LN.data,
features.plot = c("Pax6", "Sp8", "Six3", "Isl1", "Ebf1", "Dlk1", "Tshz2", "Th", "Lhx6"),
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
nCol = 3,
overlay = F,
dark.theme = F)
## [1] "30 novembre, 2020, 11,36"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pvclust_2.2-0 matrixStats_0.55.0 dendextend_1.12.0
## [4] RColorBrewer_1.1-2 scrattch.vis_0.0.210 purrr_0.3.3
## [7] ggbeeswarm_0.6.0 dplyr_0.8.3 scrattch.hicat_0.0.16
## [10] Seurat_2.3.4 Matrix_1.2-17 cowplot_1.0.0
## [13] ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 class_7.3-17
## [4] modeltools_0.2-22 ggridges_0.5.1 mclust_5.4.5
## [7] htmlTable_1.13.2 base64enc_0.1-3 rstudioapi_0.11
## [10] proxy_0.4-23 farver_2.0.1 npsurv_0.4-0
## [13] flexmix_2.3-15 bit64_4.0.2 codetools_0.2-16
## [16] splines_3.6.3 R.methodsS3_1.7.1 lsei_1.2-0
## [19] robustbase_0.93-5 knitr_1.26 zeallot_0.1.0
## [22] jsonlite_1.7.0 Formula_1.2-3 ica_1.0-2
## [25] cluster_2.1.0 kernlab_0.9-29 png_0.1-7
## [28] R.oo_1.23.0 compiler_3.6.3 httr_1.4.1
## [31] backports_1.1.5 assertthat_0.2.1 lazyeval_0.2.2
## [34] lars_1.2 acepack_1.4.1 htmltools_0.5.0
## [37] tools_3.6.3 igraph_1.2.5 gtable_0.3.0
## [40] glue_1.4.1 RANN_2.6.1 reshape2_1.4.3
## [43] Rcpp_1.0.5 vctrs_0.2.0 gdata_2.18.0
## [46] ape_5.3 nlme_3.1-141 iterators_1.0.12
## [49] fpc_2.2-3 gbRd_0.4-11 lmtest_0.9-37
## [52] xfun_0.18 stringr_1.4.0 lifecycle_0.1.0
## [55] irlba_2.3.3 gtools_3.8.1 DEoptimR_1.0-8
## [58] MASS_7.3-53 zoo_1.8-6 scales_1.1.0
## [61] doSNOW_1.0.18 parallel_3.6.3 yaml_2.2.1
## [64] reticulate_1.13 pbapply_1.4-2 gridExtra_2.3
## [67] rpart_4.1-15 segmented_1.0-0 latticeExtra_0.6-28
## [70] stringi_1.4.6 highr_0.8 foreach_1.4.7
## [73] checkmate_1.9.4 caTools_1.17.1.2 bibtex_0.4.2
## [76] Rdpack_0.11-0 SDMTools_1.1-221.1 rlang_0.4.7
## [79] pkgconfig_2.0.3 dtw_1.21-3 prabclus_2.3-1
## [82] bitops_1.0-6 evaluate_0.14 lattice_0.20-41
## [85] ROCR_1.0-7 labeling_0.3 htmlwidgets_1.5.1
## [88] bit_4.0.4 tidyselect_0.2.5 plyr_1.8.4
## [91] magrittr_1.5 R6_2.4.1 snow_0.4-3
## [94] gplots_3.0.1.1 Hmisc_4.3-0 pillar_1.4.2
## [97] foreign_0.8-72 withr_2.1.2 fitdistrplus_1.0-14
## [100] mixtools_1.1.0 survival_2.44-1.1 nnet_7.3-14
## [103] tsne_0.1-3 tibble_2.1.3 crayon_1.3.4
## [106] hdf5r_1.3.2.9000 KernSmooth_2.23-15 rmarkdown_2.5
## [109] viridis_0.5.1 grid_3.6.3 data.table_1.12.6
## [112] metap_1.1 digest_0.6.25 diptest_0.75-7
## [115] tidyr_1.0.0 R.utils_2.9.0 stats4_3.6.3
## [118] munsell_0.5.0 viridisLite_0.3.0 beeswarm_0.2.3
## [121] vipor_0.4.5
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩