#load library
library(Seurat)
library(scrattch.hicat)
library(scrattch.vis)
library(RColorBrewer)
library(dendextend)
library(dplyr)
library(matrixStats)
library(Matrix)
#Set ggplot theme
theme_set(theme_classic())# Extract most mature pallial neuron clusters
glut.clusters <- paste0( c("LN.Glut."),c("13", "1", "14", "22", "16", "19", "24", "26", "20", "21"))
# Extract pallial neuron clusters
Glut.LN.data <- SubsetData(Allcells.data, ident.use = glut.clusters, subset.raw = T,  do.clean = F)
rm(list = ls()[!ls() %in% c("Glut.LN.data")])colors <- c("#9ec22f", "#ebcb2e", "#a9961b", "#e7823a", "#cc3a1b", "#d14c8d", "#046c9a", "#4990c9", "#4cabdc", "#5ab793")
# Prepare annotation for hicat pipeline
colorsident <- cbind(ident = unique(as.character(Glut.LN.data@ident)),
                     colors = colors,
                     id = unique(as.character(Glut.LN.data@ident)))
# Create annotation data.frame
anno.df <- as.data.frame(cbind(
  sample_name = row.names(Glut.LN.data@meta.data),
  primary_type_id = colorsident[match(as.character(Glut.LN.data@ident), colorsident[,1]),3],
  primary_type_label = as.character(Glut.LN.data@ident),
  primary_type_color = colorsident[match(as.character(Glut.LN.data@ident), colorsident[,1]),2]
))
# Make a data.frame of unique cluster id, type, color, and broad type
cl.df <- anno.df %>%
  select(primary_type_id, 
         primary_type_label, 
         primary_type_color) %>%
  unique()
colnames(cl.df)[1:3] <- c("cluster_id", "cluster_label", "cluster_color")
# Sort by cluster_id
cl.df <- arrange(cl.df, cluster_id)
row.names(cl.df) <- cl.df$cluster_id
cl.fact <- setNames(factor(anno.df$primary_type_id), anno.df$sample_name)# Filter genes
num.cells <- Matrix::rowSums(Glut.LN.data@data > 0) 
genes.use <- names(x = num.cells[which(x = num.cells >= 10)]) 
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE), grep(pattern = "^mt-", x = genes.use, value = TRUE), "Xist")
genes.use <- genes.use[!genes.use %in% GenesToRemove] ; rm(GenesToRemove, num.cells)
Glut.LN.data@raw.data <- Glut.LN.data@raw.data[genes.use, ]
Glut.LN.data <- NormalizeData(object = Glut.LN.data,
                                      normalization.method = "LogNormalize", 
                                      scale.factor = round(median(Glut.LN.data@meta.data$nUMI)),
                                      display.progress = F)
# Find all var genes to retrict analysis
Glut.LN.data <- FindVariableGenes(object = Glut.LN.data,
                                          mean.function = ExpMean,
                                          dispersion.function = LogVMR,
                                          x.low.cutoff = 0.02,
                                          x.high.cutoff = 3,
                                          y.cutoff = 1, 
                                          do.plot = F, display.progress = F)
dgeMatrix_count <- as.matrix(Glut.LN.data@raw.data)[rownames(Glut.LN.data@raw.data) %in% Glut.LN.data@var.genes,]
dgeMatrix_cpm <- cpm(dgeMatrix_count) 
norm.dat <- log2(dgeMatrix_cpm + 1) ; rm(dgeMatrix_cpm)
Data.matrix <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat) ; attach(Data.matrix)
rm(dgeMatrix_count,norm.dat)Scrattch.hicat perform hierarchical clustering on a cluster correlation matrix based on median expression values for the top 50 most DEGs between every pair of clusters. It estimates branch confidence level using a bootstrap approach implemented by the pvclust package
# Build the dendrogram
dend.result <- build_dend(cl.med[,levels(cl.fact)],
                          l.color= setNames(as.character(cl.df$cluster_color), row.names(cl.df)),
                          nboot = 100)## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
# Attach cluster labels
dend.labeled <- dend.result$dend
labels(dend.labeled) <- cl.df[labels(dend.labeled), "cluster_label"]
plot(dend.labeled) Fig. S3A
Glut.LN.data@cluster.tree[[1]] <- ape::as.phylo(dend.labeled)
labels(Glut.LN.data@cluster.tree[[1]]) <- cl.df[as.numeric(labels(Glut.LN.data@cluster.tree[[1]])), "cluster_label"]
Glut.LN.data@ident <- factor(Glut.LN.data@ident, levels = paste0( c("LN.Glut."),c("13", "1", "14", "22", "16", "19", "24", "26", "20", "21")))# For each nodes, find for daughter branches the 10 genes which define them
Branch.markers.list <- Find.nodes.markers(Glut.LN.data, 10)# Generate a dot plot for each nodes
plot.list <- Node.markers.plots(Glut.LN.data,
                                Branch.markers.list)## [[1]]
Fig. S3B
## [[1]]
Fig. S3C
## [[1]]
Fig. S3D
## [[1]]
Fig. S3E
## [[1]]
Fig. S3F
## [[1]]
Fig. S3G
## [[1]]
Fig. S3H
## [[1]]
Fig. S3I
## [[1]]
Fig. S3J
## [1] "30 novembre, 2020, 11,37"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
##  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
##  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pvclust_2.2-0         matrixStats_0.55.0    dendextend_1.12.0    
##  [4] RColorBrewer_1.1-2    scrattch.vis_0.0.210  purrr_0.3.3          
##  [7] ggbeeswarm_0.6.0      dplyr_0.8.3           scrattch.hicat_0.0.16
## [10] Seurat_2.3.4          Matrix_1.2-17         cowplot_1.0.0        
## [13] ggplot2_3.2.1        
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15          colorspace_1.4-1    ellipsis_0.3.0     
##   [4] class_7.3-17        modeltools_0.2-22   ggridges_0.5.1     
##   [7] mclust_5.4.5        htmlTable_1.13.2    base64enc_0.1-3    
##  [10] rstudioapi_0.11     proxy_0.4-23        farver_2.0.1       
##  [13] npsurv_0.4-0        flexmix_2.3-15      bit64_4.0.2        
##  [16] codetools_0.2-16    splines_3.6.3       R.methodsS3_1.7.1  
##  [19] lsei_1.2-0          robustbase_0.93-5   knitr_1.26         
##  [22] zeallot_0.1.0       jsonlite_1.7.0      Formula_1.2-3      
##  [25] ica_1.0-2           cluster_2.1.0       kernlab_0.9-29     
##  [28] png_0.1-7           R.oo_1.23.0         compiler_3.6.3     
##  [31] httr_1.4.1          backports_1.1.5     assertthat_0.2.1   
##  [34] lazyeval_0.2.2      lars_1.2            acepack_1.4.1      
##  [37] htmltools_0.5.0     tools_3.6.3         igraph_1.2.5       
##  [40] gtable_0.3.0        glue_1.4.1          RANN_2.6.1         
##  [43] reshape2_1.4.3      Rcpp_1.0.5          vctrs_0.2.0        
##  [46] gdata_2.18.0        ape_5.3             nlme_3.1-141       
##  [49] iterators_1.0.12    fpc_2.2-3           gbRd_0.4-11        
##  [52] lmtest_0.9-37       xfun_0.18           stringr_1.4.0      
##  [55] lifecycle_0.1.0     irlba_2.3.3         gtools_3.8.1       
##  [58] DEoptimR_1.0-8      MASS_7.3-53         zoo_1.8-6          
##  [61] scales_1.1.0        doSNOW_1.0.18       parallel_3.6.3     
##  [64] yaml_2.2.1          reticulate_1.13     pbapply_1.4-2      
##  [67] gridExtra_2.3       rpart_4.1-15        segmented_1.0-0    
##  [70] latticeExtra_0.6-28 stringi_1.4.6       highr_0.8          
##  [73] foreach_1.4.7       checkmate_1.9.4     caTools_1.17.1.2   
##  [76] bibtex_0.4.2        Rdpack_0.11-0       SDMTools_1.1-221.1 
##  [79] rlang_0.4.7         pkgconfig_2.0.3     dtw_1.21-3         
##  [82] prabclus_2.3-1      bitops_1.0-6        evaluate_0.14      
##  [85] lattice_0.20-41     ROCR_1.0-7          labeling_0.3       
##  [88] htmlwidgets_1.5.1   bit_4.0.4           tidyselect_0.2.5   
##  [91] plyr_1.8.4          magrittr_1.5        R6_2.4.1           
##  [94] snow_0.4-3          gplots_3.0.1.1      Hmisc_4.3-0        
##  [97] pillar_1.4.2        foreign_0.8-72      withr_2.1.2        
## [100] fitdistrplus_1.0-14 mixtools_1.1.0      survival_2.44-1.1  
## [103] nnet_7.3-14         tsne_0.1-3          tibble_2.1.3       
## [106] crayon_1.3.4        hdf5r_1.3.2.9000    KernSmooth_2.23-15 
## [109] rmarkdown_2.5       viridis_0.5.1       grid_3.6.3         
## [112] data.table_1.12.6   metap_1.1           digest_0.6.25      
## [115] diptest_0.75-7      tidyr_1.0.0         R.utils_2.9.0      
## [118] stats4_3.6.3        munsell_0.5.0       viridisLite_0.3.0  
## [121] beeswarm_0.2.3      vipor_0.4.5
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩