#load library
library(Seurat)
library(scrattch.hicat)
library(scrattch.vis)
library(RColorBrewer)
library(dendextend)
library(dplyr)
library(matrixStats)
library(Matrix)
#Set ggplot theme
theme_set(theme_classic())
# Extract most mature pallial neuron clusters
glut.clusters <- paste0( c("LN.Glut."),c("13", "1", "14", "22", "16", "19", "24", "26", "20", "21"))
# Extract pallial neuron clusters
Glut.LN.data <- SubsetData(Allcells.data, ident.use = glut.clusters, subset.raw = T, do.clean = F)
rm(list = ls()[!ls() %in% c("Glut.LN.data")])
colors <- c("#9ec22f", "#ebcb2e", "#a9961b", "#e7823a", "#cc3a1b", "#d14c8d", "#046c9a", "#4990c9", "#4cabdc", "#5ab793")
# Prepare annotation for hicat pipeline
colorsident <- cbind(ident = unique(as.character(Glut.LN.data@ident)),
colors = colors,
id = unique(as.character(Glut.LN.data@ident)))
# Create annotation data.frame
anno.df <- as.data.frame(cbind(
sample_name = row.names(Glut.LN.data@meta.data),
primary_type_id = colorsident[match(as.character(Glut.LN.data@ident), colorsident[,1]),3],
primary_type_label = as.character(Glut.LN.data@ident),
primary_type_color = colorsident[match(as.character(Glut.LN.data@ident), colorsident[,1]),2]
))
# Make a data.frame of unique cluster id, type, color, and broad type
cl.df <- anno.df %>%
select(primary_type_id,
primary_type_label,
primary_type_color) %>%
unique()
colnames(cl.df)[1:3] <- c("cluster_id", "cluster_label", "cluster_color")
# Sort by cluster_id
cl.df <- arrange(cl.df, cluster_id)
row.names(cl.df) <- cl.df$cluster_id
cl.fact <- setNames(factor(anno.df$primary_type_id), anno.df$sample_name)
# Filter genes
num.cells <- Matrix::rowSums(Glut.LN.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE), grep(pattern = "^mt-", x = genes.use, value = TRUE), "Xist")
genes.use <- genes.use[!genes.use %in% GenesToRemove] ; rm(GenesToRemove, num.cells)
Glut.LN.data@raw.data <- Glut.LN.data@raw.data[genes.use, ]
Glut.LN.data <- NormalizeData(object = Glut.LN.data,
normalization.method = "LogNormalize",
scale.factor = round(median(Glut.LN.data@meta.data$nUMI)),
display.progress = F)
# Find all var genes to retrict analysis
Glut.LN.data <- FindVariableGenes(object = Glut.LN.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.02,
x.high.cutoff = 3,
y.cutoff = 1,
do.plot = F, display.progress = F)
dgeMatrix_count <- as.matrix(Glut.LN.data@raw.data)[rownames(Glut.LN.data@raw.data) %in% Glut.LN.data@var.genes,]
dgeMatrix_cpm <- cpm(dgeMatrix_count)
norm.dat <- log2(dgeMatrix_cpm + 1) ; rm(dgeMatrix_cpm)
Data.matrix <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat) ; attach(Data.matrix)
rm(dgeMatrix_count,norm.dat)
Scrattch.hicat perform hierarchical clustering on a cluster correlation matrix based on median expression values for the top 50 most DEGs between every pair of clusters. It estimates branch confidence level using a bootstrap approach implemented by the pvclust package
# Build the dendrogram
dend.result <- build_dend(cl.med[,levels(cl.fact)],
l.color= setNames(as.character(cl.df$cluster_color), row.names(cl.df)),
nboot = 100)
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
# Attach cluster labels
dend.labeled <- dend.result$dend
labels(dend.labeled) <- cl.df[labels(dend.labeled), "cluster_label"]
plot(dend.labeled)
Glut.LN.data@cluster.tree[[1]] <- ape::as.phylo(dend.labeled)
labels(Glut.LN.data@cluster.tree[[1]]) <- cl.df[as.numeric(labels(Glut.LN.data@cluster.tree[[1]])), "cluster_label"]
Glut.LN.data@ident <- factor(Glut.LN.data@ident, levels = paste0( c("LN.Glut."),c("13", "1", "14", "22", "16", "19", "24", "26", "20", "21")))
# For each nodes, find for daughter branches the 10 genes which define them
Branch.markers.list <- Find.nodes.markers(Glut.LN.data, 10)
# Generate a dot plot for each nodes
plot.list <- Node.markers.plots(Glut.LN.data,
Branch.markers.list)
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [[1]]
## [1] "30 novembre, 2020, 11,37"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pvclust_2.2-0 matrixStats_0.55.0 dendextend_1.12.0
## [4] RColorBrewer_1.1-2 scrattch.vis_0.0.210 purrr_0.3.3
## [7] ggbeeswarm_0.6.0 dplyr_0.8.3 scrattch.hicat_0.0.16
## [10] Seurat_2.3.4 Matrix_1.2-17 cowplot_1.0.0
## [13] ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 ellipsis_0.3.0
## [4] class_7.3-17 modeltools_0.2-22 ggridges_0.5.1
## [7] mclust_5.4.5 htmlTable_1.13.2 base64enc_0.1-3
## [10] rstudioapi_0.11 proxy_0.4-23 farver_2.0.1
## [13] npsurv_0.4-0 flexmix_2.3-15 bit64_4.0.2
## [16] codetools_0.2-16 splines_3.6.3 R.methodsS3_1.7.1
## [19] lsei_1.2-0 robustbase_0.93-5 knitr_1.26
## [22] zeallot_0.1.0 jsonlite_1.7.0 Formula_1.2-3
## [25] ica_1.0-2 cluster_2.1.0 kernlab_0.9-29
## [28] png_0.1-7 R.oo_1.23.0 compiler_3.6.3
## [31] httr_1.4.1 backports_1.1.5 assertthat_0.2.1
## [34] lazyeval_0.2.2 lars_1.2 acepack_1.4.1
## [37] htmltools_0.5.0 tools_3.6.3 igraph_1.2.5
## [40] gtable_0.3.0 glue_1.4.1 RANN_2.6.1
## [43] reshape2_1.4.3 Rcpp_1.0.5 vctrs_0.2.0
## [46] gdata_2.18.0 ape_5.3 nlme_3.1-141
## [49] iterators_1.0.12 fpc_2.2-3 gbRd_0.4-11
## [52] lmtest_0.9-37 xfun_0.18 stringr_1.4.0
## [55] lifecycle_0.1.0 irlba_2.3.3 gtools_3.8.1
## [58] DEoptimR_1.0-8 MASS_7.3-53 zoo_1.8-6
## [61] scales_1.1.0 doSNOW_1.0.18 parallel_3.6.3
## [64] yaml_2.2.1 reticulate_1.13 pbapply_1.4-2
## [67] gridExtra_2.3 rpart_4.1-15 segmented_1.0-0
## [70] latticeExtra_0.6-28 stringi_1.4.6 highr_0.8
## [73] foreach_1.4.7 checkmate_1.9.4 caTools_1.17.1.2
## [76] bibtex_0.4.2 Rdpack_0.11-0 SDMTools_1.1-221.1
## [79] rlang_0.4.7 pkgconfig_2.0.3 dtw_1.21-3
## [82] prabclus_2.3-1 bitops_1.0-6 evaluate_0.14
## [85] lattice_0.20-41 ROCR_1.0-7 labeling_0.3
## [88] htmlwidgets_1.5.1 bit_4.0.4 tidyselect_0.2.5
## [91] plyr_1.8.4 magrittr_1.5 R6_2.4.1
## [94] snow_0.4-3 gplots_3.0.1.1 Hmisc_4.3-0
## [97] pillar_1.4.2 foreign_0.8-72 withr_2.1.2
## [100] fitdistrplus_1.0-14 mixtools_1.1.0 survival_2.44-1.1
## [103] nnet_7.3-14 tsne_0.1-3 tibble_2.1.3
## [106] crayon_1.3.4 hdf5r_1.3.2.9000 KernSmooth_2.23-15
## [109] rmarkdown_2.5 viridis_0.5.1 grid_3.6.3
## [112] data.table_1.12.6 metap_1.1 digest_0.6.25
## [115] diptest_0.75-7 tidyr_1.0.0 R.utils_2.9.0
## [118] stats4_3.6.3 munsell_0.5.0 viridisLite_0.3.0
## [121] beeswarm_0.2.3 vipor_0.4.5
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩