# Load library
library(Seurat)
library(reticulate)
library(princurve)
library(mgcv)
library(Rtsne)
library(tidyr)
library(dplyr)
library(ggplot2)
library(ggplotify)
library(pheatmap)
library(gridExtra)
library(ggExtra)
library(cowplot)
library(patchwork)
library(RColorBrewer)
library(monocle)
#Set ggplot theme as classic
theme_set(theme_classic())AP.data <- readRDS("./AP.data.RDS")# Filter out subpallial progenitors
AP.ident <- unique(AP.data@ident)
AP.data <- SubsetData(AP.data,
ident.use = grep("Sub", AP.ident, value = T, invert = T),
subset.raw = T,
do.clean = F)
# Reset the pseudo-DV score to [0,1]
score <- AP.data@meta.data$DorsoVentral.Score
AP.data@meta.data$DorsoVentral.Score <- (score - min(score)) / (max(score) - min(score))p1 <- DimPlot(AP.data,
group.by = "Domaine",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=F,
label.size = 4,
no.legend = F,
do.return = T,
cols.use = tolower(c("#68B041", "#E3C148", "#B7D174", "#E46B6B")))
data <- data.frame(spring1 = AP.data@dr$spring@cell.embeddings[,1],
spring2 = AP.data@dr$spring@cell.embeddings[,2],
DorsoVentral.Score = AP.data@meta.data$DorsoVentral.Score)
cols <- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
p2 <- ggplot(data, aes(spring1, spring2)) +
geom_point(aes(color=DorsoVentral.Score), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Speudotime score')
p1 + p2# Remove non expressed genes
num.cells <- Matrix::rowSums(AP.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 20)])
AP.data@raw.data <- AP.data@raw.data[genes.use, ]
AP.data@data <- AP.data@data[genes.use, ]
# Normalize the counts matrix
AP.data <- NormalizeData(object = AP.data,
normalization.method = "LogNormalize",
scale.factor = round(median(AP.data@meta.data$nUMI)), display.progress = F)
# Scale expression matrix
AP.data <- ScaleData(object = AP.data,
vars.to.regress = c("nUMI", "nGene", "percent.ribo", "percent.mito"),
display.progress = F)
# Find most variable genes
AP.data <- FindVariableGenes(object = AP.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.0125,
x.high.cutoff = 3,
y.cutoff = 1,
do.plot = F,
display.progress = F)rm(list=ls()[!ls() %in% c("AP.data")])# PCA
AP.data <- RunPCA(object = AP.data,
pcs.compute = 10,
do.print = F,
pcs.print = 1,
genes.print = 1,
weight.by.var=T)Varaxis <- data.frame(Ccycle = AP.data@meta.data$CC.Difference,
Sphase = AP.data@meta.data$S.Score,
Dvaxis = AP.data@meta.data$DorsoVentral.Score)
PCcor <- abs(cor(AP.data@dr$pca@cell.embeddings, Varaxis))
pheatmap(t(PCcor),
color = colorRampPalette(brewer.pal(n = 9, name = "RdPu"))(100),
cluster_rows = F,
cluster_cols = F,
main = "Pearson's correlation between PCs and characterized axis of variation")# Exclude PCs having a correlation with characterized axis of variation > 0.25
cor.th <- 0.25
Selected_PCs <- seq(1:nrow(PCcor))[colSums(t(PCcor) >= cor.th) == 0]
Selected_PCs## [1] 4 5 6 7 8 9 10
data <- as.data.frame(AP.data@dr$pca@cell.embeddings[,Selected_PCs])
fit <- principal_curve(as.matrix(data),
smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)## Starting curve---distance^2: 33027138
## Iteration 1---distance^2: 36209.57
## Iteration 2---distance^2: 35442.71
## Iteration 3---distance^2: 35089.61
## Iteration 4---distance^2: 34762.58
## Iteration 5---distance^2: 34502.34
## Iteration 6---distance^2: 34333.51
## Iteration 7---distance^2: 34266.7
## Iteration 8---distance^2: 34206.63
## Iteration 9---distance^2: 34180.74
RostroCaudal.score <- fit$lambda/max(fit$lambda)
AP.data@meta.data$RostroCaudal.score <- RostroCaudal.scoredata$Domaine <- AP.data@meta.data$Domaine
ggplot(data, aes(PC4, PC5)) +
geom_point(aes(color=Domaine), size=3, shape=16) +
geom_line(data=as.data.frame(fit$s[order(fit$lambda),]), color='red', size=0.77) +
scale_color_manual(values=c("#68b041", "#e3c148", "#b7d174", "#e46b6b"))Varaxis <- data.frame(Ccycle = AP.data@meta.data$CC.Difference,
Sphase = AP.data@meta.data$S.Score,
Dvaxis = AP.data@meta.data$DorsoVentral.Score,
RCaxis = AP.data@meta.data$RostroCaudal.score)
PCcor <- abs(cor(AP.data@dr$pca@cell.embeddings, Varaxis))
pheatmap(t(PCcor),
color = colorRampPalette(brewer.pal(n = 9, name = "RdPu"))(100),
cluster_rows = F,
cluster_cols = F,
main = "Pearson's correlation between PCs and characterized axis of variation")Coordinates <- data.frame(RostroCaudal.Axis = -AP.data@meta.data$RostroCaudal.score,
DorsoVentral.Axis = AP.data@meta.data$DorsoVentral.Score)
colnames(Coordinates) <- c("RC.DV.axis1","RC.DV.axis2")
rownames(Coordinates) <- rownames(AP.data@meta.data)
AP.data <- SetDimReduction(AP.data,
reduction.type = "RC.DV.axis",
slot = "cell.embeddings",
new.data = as.matrix(Coordinates))
AP.data@dr$RC.DV.axis@key <- "RC.DV.axis"
colnames(AP.data@dr$RC.DV.axis@cell.embeddings) <- paste0(GetDimReduction(object = AP.data, reduction.type = "RC.DV.axis",slot = "key"), c(1,2))DimPlot(AP.data,
group.by = "Domaine",
reduction.use = "RC.DV.axis",
do.label=F,
pt.size = 3,
label.size = 4,
no.legend = F,
do.return = T,
cols.use =c("#68b041", "#e3c148", "#b7d174", "#e46b6b"))source("./functions/GenesTrendPlots.R")
Plot.Genes.trend(AP.data,
genes = c("Etv5","Pou3f2", "Nr2f2", "Igfbp5"),
Axis = "RostroCaudal.score",
Use.scale.data = F)plot <- FeaturePlot(object = AP.data,
features.plot = c("Etv5","Pou3f2", "Nr2f2", "Igfbp5"),
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "RC.DV.axis",
no.legend = T,
pt.size = 2,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)for (i in 1:length(plot)){
plot[[i]]$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
}cowplot::plot_grid(plotlist = plot[1:4], ncol =2)# Transfer metadata
meta.data <- data.frame(barcode = rownames(AP.data@meta.data),
Cluster = AP.data@meta.data$old.ident,
RostroCaudal.score = AP.data@meta.data$RostroCaudal.score,
DorsoVentral.Score = AP.data@meta.data$DorsoVentral.Score,
Domaine = AP.data@meta.data$Domaine,
CellcyclePhase = AP.data@meta.data$Phase,
row.names = rownames(AP.data@meta.data))
Annot.data <- new('AnnotatedDataFrame', data = meta.data)
# Transfer count data
count.data = data.frame(gene_short_name = rownames(AP.data@raw.data),
row.names = rownames(AP.data@raw.data))
feature.data <- new('AnnotatedDataFrame', data = count.data)
# Create the CellDataSet object
gbm_cds <- newCellDataSet(as.matrix(AP.data@raw.data),
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 1,
expressionFamily = negbinomial())gbm_cds <- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)rm(list = ls()[!ls() %in% c("AP.data", "gbm_cds")])# Exclude cell cycle associated genes
CCgenes <- as.character(read.table("./Progenitors/CellCycleGenes.csv", sep = "\t", header = F)[,1])
Input.genes <- AP.data@var.genes[!AP.data@var.genes %in% c(CCgenes,"Xist")]RC.Axis.genes <- differentialGeneTest(gbm_cds[Input.genes,],
fullModelFormulaStr = "~sm.ns(RostroCaudal.score, df = 3)*sm.ns(DorsoVentral.Score, df = 3)",
reducedModelFormulaStr = "~sm.ns(DorsoVentral.Score, df = 3)",
cores = detectCores() -2)
# Filter genes with a FDR < 0.0001
RC.Axis.genes.FDR.filtered <- RC.Axis.genes %>% filter(qval < 1e-4)source("./functions/FunctionsLandscapeSmoothing.R")
genes <- as.character(RC.Axis.genes.FDR.filtered$gene_short_name)
Sig.gene.expr.data <- AP.data@dr$RC.DV.axis@cell.embeddings
Sig.gene.expr.data <- as.data.frame(cbind(Sig.gene.expr.data, t(as.matrix(AP.data@data[genes,]))))
Sig.gene.landscape <- Gene.smooth.landscape.gam(Sig.gene.expr.data, scale.exp = T)pcs <- prcomp(t(Sig.gene.landscape[,3:ncol(Sig.gene.landscape)]))
eigs <- (pcs$sdev[1:10])^2
PercentVAr <- data.frame(PercentVAr = eigs*100 / sum(eigs),
PCs = 1:length(eigs))
ggplot(PercentVAr,aes(x= as.factor(PCs),y=PercentVAr)) +
geom_point() +
geom_hline(yintercept = 5,linetype = 2, colour="red")set.seed(1234)
GenePCs <- as.data.frame(pcs$x[,1:3])
GenePCs$Gene <- rownames(GenePCs)
# Perform Kmeans clustering
kmeans <- kmeans(x = GenePCs[,1:3], centers = 5)
GenePCs$Cluster <- factor(kmeans$cluster)
ggplot(GenePCs, aes(PC1, -PC2)) +
geom_point(aes(color=Cluster),size=3, shape=16) +
scale_color_manual(values=c("#d14c8d","#9ec22f", "#a9961b", "#cc3a1b", "#4cabdc", "#5ab793")) +
ggrepel::geom_text_repel((aes(label=ifelse(Gene %in% c("Mpped2","Lhx2", "Etv5","Pax6", "Celf4", "Fgf15","Nr2f1", "Lypd6"), as.character(Gene),''))),
box.padding = 0.2,
point.padding = 0.2,
max.overlaps = 50,
segment.color = 'grey50',
colour = "black")RC.Axis.genes.FDR.filtered$PC1 <- GenePCs$PC1
RC.Axis.genes.FDR.filtered$PC2 <- GenePCs$PC2
RC.Axis.genes.FDR.filtered$Cluster <- GenePCs$ClusterGenes.landscape <- Gene.smooth.landscape.gam(Sig.gene.expr.data, scale.exp = F)
Cluster.means <- Genes.landscape[,1:2]
Cluster.means$Clust.1 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 1]]))
Cluster.means$Clust.2 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 2]]))
Cluster.means$Clust.3 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 3]]))
Cluster.means$Clust.4 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 4]]))
Cluster.means$Clust.5 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 5]]))
table(GenePCs$Cluster)##
## 1 2 3 4 5
## 33 29 30 46 21
Plot.landscape(data = Cluster.means,
genes = paste0("Clust.", 1:5),
col.pal = "BrBG",
ncols = 3)Selected.gene <- c("Mpped2","Lhx2", "Etv5",
"Pax6", "Nr2f1", "Lypd6")
Selected.gene.expr.data <- AP.data@dr$RC.DV.axis@cell.embeddings
Selected.gene.expr.data <- as.data.frame(cbind(Selected.gene.expr.data, t(as.matrix(AP.data@data[Selected.gene,]))))
Selected.gene.landscape <- Gene.smooth.landscape.gam(Selected.gene.expr.data, scale.exp = T)
Plot.landscape(data = Selected.gene.landscape ,
genes = Selected.gene,
col.pal = "RdBu",
ncols = 3)DVgenes <- read.table("./Progenitors/Gene.dynamique.csv", sep = ";", header = T)
RC.genes <- as.character(RC.Axis.genes.FDR.filtered$gene_short_name)
DV.genes <- as.character(DVgenes$Gene)
gene.list <- list(RC.genes = RC.genes, DV.genes = DV.genes)
ggvenn::ggvenn(gene.list)RC.Axis.genes.FDR.filtered$DV_axis_variable <- RC.genes %in% DV.genes
write.table(RC.Axis.genes.FDR.filtered, "./Progenitors/Rostro_Caudal_genes.csv", sep = ";", quote = F)#date
format(Sys.time(), "%d %B, %Y, %H,%M")## [1] "03 mai, 2021, 11,49"
#Packages used
sessionInfo()## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] monocle_2.14.0 DDRTree_0.1.5 irlba_2.3.3
## [4] VGAM_1.1-2 Biobase_2.46.0 BiocGenerics_0.32.0
## [7] RColorBrewer_1.1-2 patchwork_0.0.1 ggExtra_0.9
## [10] gridExtra_2.3 pheatmap_1.0.12 ggplotify_0.0.5
## [13] dplyr_0.8.3 tidyr_1.0.0 Rtsne_0.15
## [16] mgcv_1.8-33 nlme_3.1-141 princurve_2.1.4
## [19] reticulate_1.18 Seurat_2.3.4 Matrix_1.2-17
## [22] cowplot_1.0.0 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.5 Hmisc_4.3-0
## [4] plyr_1.8.4 igraph_1.2.5 lazyeval_0.2.2
## [7] densityClust_0.3 fastICA_1.2-2 digest_0.6.25
## [10] foreach_1.4.7 htmltools_0.5.0 viridis_0.5.1
## [13] lars_1.2 gdata_2.18.0 magrittr_1.5
## [16] checkmate_1.9.4 cluster_2.1.0 mixtools_1.1.0
## [19] ROCR_1.0-7 limma_3.42.0 matrixStats_0.55.0
## [22] docopt_0.6.1 R.utils_2.9.0 colorspace_1.4-1
## [25] ggrepel_0.8.1 xfun_0.18 sparsesvd_0.2
## [28] crayon_1.4.0 jsonlite_1.7.0 zeallot_0.1.0
## [31] survival_2.44-1.1 zoo_1.8-6 iterators_1.0.12
## [34] ape_5.3 glue_1.4.1 gtable_0.3.0
## [37] kernlab_0.9-29 prabclus_2.3-1 DEoptimR_1.0-8
## [40] scales_1.1.0 bibtex_0.4.2 miniUI_0.1.1.1
## [43] Rcpp_1.0.5 metap_1.1 dtw_1.21-3
## [46] viridisLite_0.3.0 xtable_1.8-4 htmlTable_1.13.2
## [49] gridGraphics_0.5-1 foreign_0.8-72 bit_4.0.4
## [52] proxy_0.4-23 mclust_5.4.5 SDMTools_1.1-221.1
## [55] Formula_1.2-3 tsne_0.1-3 htmlwidgets_1.5.1
## [58] httr_1.4.1 FNN_1.1.3 gplots_3.0.1.1
## [61] fpc_2.2-3 acepack_1.4.1 modeltools_0.2-22
## [64] ica_1.0-2 farver_2.0.1 pkgconfig_2.0.3
## [67] R.methodsS3_1.7.1 flexmix_2.3-15 nnet_7.3-14
## [70] labeling_0.3 tidyselect_0.2.5 rlang_0.4.7
## [73] reshape2_1.4.3 later_1.0.0 munsell_0.5.0
## [76] tools_3.6.3 ggridges_0.5.1 evaluate_0.14
## [79] stringr_1.4.0 fastmap_1.0.1 yaml_2.2.1
## [82] npsurv_0.4-0 knitr_1.26 bit64_4.0.2
## [85] fitdistrplus_1.0-14 robustbase_0.93-5 caTools_1.17.1.2
## [88] purrr_0.3.3 RANN_2.6.1 pbapply_1.4-2
## [91] mime_0.7 slam_0.1-46 R.oo_1.23.0
## [94] ggvenn_0.1.8 hdf5r_1.3.2.9000 compiler_3.6.3
## [97] rstudioapi_0.11 png_0.1-7 lsei_1.2-0
## [100] tibble_2.1.3 stringi_1.4.6 lattice_0.20-41
## [103] HSMMSingleCell_1.6.0 vctrs_0.2.0 pillar_1.4.2
## [106] lifecycle_0.1.0 BiocManager_1.30.10 combinat_0.0-8
## [109] Rdpack_0.11-0 lmtest_0.9-37 data.table_1.12.6
## [112] bitops_1.0-6 gbRd_0.4-11 httpuv_1.5.2
## [115] R6_2.4.1 latticeExtra_0.6-28 promises_1.1.0
## [118] KernSmooth_2.23-15 codetools_0.2-16 MASS_7.3-53
## [121] gtools_3.8.1 assertthat_0.2.1 withr_2.1.2
## [124] qlcMatrix_0.9.7 diptest_0.75-7 doSNOW_1.0.18
## [127] grid_3.6.3 rpart_4.1-15 class_7.3-17
## [130] rmarkdown_2.5 rvcheck_0.1.7 segmented_1.0-0
## [133] shiny_1.4.0 base64enc_0.1-3
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎