# Load library
library(Seurat)
library(reticulate)
library(princurve)
library(mgcv)
library(Rtsne)
library(tidyr)
library(dplyr)
library(ggplot2)
library(ggplotify)
library(pheatmap)
library(gridExtra)
library(ggExtra)
library(cowplot)
library(patchwork)
library(RColorBrewer)
library(monocle)
#Set ggplot theme as classic
theme_set(theme_classic())
<- readRDS("./AP.data.RDS") AP.data
# Filter out subpallial progenitors
<- unique(AP.data@ident)
AP.ident <- SubsetData(AP.data,
AP.data ident.use = grep("Sub", AP.ident, value = T, invert = T),
subset.raw = T,
do.clean = F)
# Reset the pseudo-DV score to [0,1]
<- AP.data@meta.data$DorsoVentral.Score
score @meta.data$DorsoVentral.Score <- (score - min(score)) / (max(score) - min(score)) AP.data
<- DimPlot(AP.data,
p1 group.by = "Domaine",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=F,
label.size = 4,
no.legend = F,
do.return = T,
cols.use = tolower(c("#68B041", "#E3C148", "#B7D174", "#E46B6B")))
<- data.frame(spring1 = AP.data@dr$spring@cell.embeddings[,1],
data spring2 = AP.data@dr$spring@cell.embeddings[,2],
DorsoVentral.Score = AP.data@meta.data$DorsoVentral.Score)
<- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
cols <- ggplot(data, aes(spring1, spring2)) +
p2 geom_point(aes(color=DorsoVentral.Score), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Speudotime score')
+ p2 p1
# Remove non expressed genes
<- Matrix::rowSums(AP.data@data > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 20)])
genes.use @raw.data <- AP.data@raw.data[genes.use, ]
AP.data@data <- AP.data@data[genes.use, ]
AP.data
# Normalize the counts matrix
<- NormalizeData(object = AP.data,
AP.data normalization.method = "LogNormalize",
scale.factor = round(median(AP.data@meta.data$nUMI)), display.progress = F)
# Scale expression matrix
<- ScaleData(object = AP.data,
AP.data vars.to.regress = c("nUMI", "nGene", "percent.ribo", "percent.mito"),
display.progress = F)
# Find most variable genes
<- FindVariableGenes(object = AP.data,
AP.data mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.0125,
x.high.cutoff = 3,
y.cutoff = 1,
do.plot = F,
display.progress = F)
rm(list=ls()[!ls() %in% c("AP.data")])
# PCA
<- RunPCA(object = AP.data,
AP.data pcs.compute = 10,
do.print = F,
pcs.print = 1,
genes.print = 1,
weight.by.var=T)
<- data.frame(Ccycle = AP.data@meta.data$CC.Difference,
Varaxis Sphase = AP.data@meta.data$S.Score,
Dvaxis = AP.data@meta.data$DorsoVentral.Score)
<- abs(cor(AP.data@dr$pca@cell.embeddings, Varaxis))
PCcor
pheatmap(t(PCcor),
color = colorRampPalette(brewer.pal(n = 9, name = "RdPu"))(100),
cluster_rows = F,
cluster_cols = F,
main = "Pearson's correlation between PCs and characterized axis of variation")
# Exclude PCs having a correlation with characterized axis of variation > 0.25
<- 0.25
cor.th <- seq(1:nrow(PCcor))[colSums(t(PCcor) >= cor.th) == 0]
Selected_PCs Selected_PCs
## [1] 4 5 6 7 8 9 10
<- as.data.frame(AP.data@dr$pca@cell.embeddings[,Selected_PCs])
data
<- principal_curve(as.matrix(data),
fit smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)
## Starting curve---distance^2: 33027138
## Iteration 1---distance^2: 36209.57
## Iteration 2---distance^2: 35442.71
## Iteration 3---distance^2: 35089.61
## Iteration 4---distance^2: 34762.58
## Iteration 5---distance^2: 34502.34
## Iteration 6---distance^2: 34333.51
## Iteration 7---distance^2: 34266.7
## Iteration 8---distance^2: 34206.63
## Iteration 9---distance^2: 34180.74
<- fit$lambda/max(fit$lambda)
RostroCaudal.score @meta.data$RostroCaudal.score <- RostroCaudal.score AP.data
$Domaine <- AP.data@meta.data$Domaine
data
ggplot(data, aes(PC4, PC5)) +
geom_point(aes(color=Domaine), size=3, shape=16) +
geom_line(data=as.data.frame(fit$s[order(fit$lambda),]), color='red', size=0.77) +
scale_color_manual(values=c("#68b041", "#e3c148", "#b7d174", "#e46b6b"))
<- data.frame(Ccycle = AP.data@meta.data$CC.Difference,
Varaxis Sphase = AP.data@meta.data$S.Score,
Dvaxis = AP.data@meta.data$DorsoVentral.Score,
RCaxis = AP.data@meta.data$RostroCaudal.score)
<- abs(cor(AP.data@dr$pca@cell.embeddings, Varaxis))
PCcor
pheatmap(t(PCcor),
color = colorRampPalette(brewer.pal(n = 9, name = "RdPu"))(100),
cluster_rows = F,
cluster_cols = F,
main = "Pearson's correlation between PCs and characterized axis of variation")
<- data.frame(RostroCaudal.Axis = -AP.data@meta.data$RostroCaudal.score,
Coordinates DorsoVentral.Axis = AP.data@meta.data$DorsoVentral.Score)
colnames(Coordinates) <- c("RC.DV.axis1","RC.DV.axis2")
rownames(Coordinates) <- rownames(AP.data@meta.data)
<- SetDimReduction(AP.data,
AP.data reduction.type = "RC.DV.axis",
slot = "cell.embeddings",
new.data = as.matrix(Coordinates))
@dr$RC.DV.axis@key <- "RC.DV.axis"
AP.datacolnames(AP.data@dr$RC.DV.axis@cell.embeddings) <- paste0(GetDimReduction(object = AP.data, reduction.type = "RC.DV.axis",slot = "key"), c(1,2))
DimPlot(AP.data,
group.by = "Domaine",
reduction.use = "RC.DV.axis",
do.label=F,
pt.size = 3,
label.size = 4,
no.legend = F,
do.return = T,
cols.use =c("#68b041", "#e3c148", "#b7d174", "#e46b6b"))
source("./functions/GenesTrendPlots.R")
Plot.Genes.trend(AP.data,
genes = c("Etv5","Pou3f2", "Nr2f2", "Igfbp5"),
Axis = "RostroCaudal.score",
Use.scale.data = F)
<- FeaturePlot(object = AP.data,
plot features.plot = c("Etv5","Pou3f2", "Nr2f2", "Igfbp5"),
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "RC.DV.axis",
no.legend = T,
pt.size = 2,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:4], ncol =2) cowplot
# Transfer metadata
<- data.frame(barcode = rownames(AP.data@meta.data),
meta.data Cluster = AP.data@meta.data$old.ident,
RostroCaudal.score = AP.data@meta.data$RostroCaudal.score,
DorsoVentral.Score = AP.data@meta.data$DorsoVentral.Score,
Domaine = AP.data@meta.data$Domaine,
CellcyclePhase = AP.data@meta.data$Phase,
row.names = rownames(AP.data@meta.data))
<- new('AnnotatedDataFrame', data = meta.data)
Annot.data
# Transfer count data
= data.frame(gene_short_name = rownames(AP.data@raw.data),
count.data row.names = rownames(AP.data@raw.data))
<- new('AnnotatedDataFrame', data = count.data)
feature.data
# Create the CellDataSet object
<- newCellDataSet(as.matrix(AP.data@raw.data),
gbm_cds phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 1,
expressionFamily = negbinomial())
<- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1) gbm_cds
rm(list = ls()[!ls() %in% c("AP.data", "gbm_cds")])
# Exclude cell cycle associated genes
<- as.character(read.table("./Progenitors/CellCycleGenes.csv", sep = "\t", header = F)[,1])
CCgenes <- AP.data@var.genes[!AP.data@var.genes %in% c(CCgenes,"Xist")] Input.genes
<- differentialGeneTest(gbm_cds[Input.genes,],
RC.Axis.genes fullModelFormulaStr = "~sm.ns(RostroCaudal.score, df = 3)*sm.ns(DorsoVentral.Score, df = 3)",
reducedModelFormulaStr = "~sm.ns(DorsoVentral.Score, df = 3)",
cores = detectCores() -2)
# Filter genes with a FDR < 0.0001
<- RC.Axis.genes %>% filter(qval < 1e-4) RC.Axis.genes.FDR.filtered
source("./functions/FunctionsLandscapeSmoothing.R")
<- as.character(RC.Axis.genes.FDR.filtered$gene_short_name)
genes
<- AP.data@dr$RC.DV.axis@cell.embeddings
Sig.gene.expr.data <- as.data.frame(cbind(Sig.gene.expr.data, t(as.matrix(AP.data@data[genes,]))))
Sig.gene.expr.data
<- Gene.smooth.landscape.gam(Sig.gene.expr.data, scale.exp = T) Sig.gene.landscape
<- prcomp(t(Sig.gene.landscape[,3:ncol(Sig.gene.landscape)]))
pcs
<- (pcs$sdev[1:10])^2
eigs <- data.frame(PercentVAr = eigs*100 / sum(eigs),
PercentVAr PCs = 1:length(eigs))
ggplot(PercentVAr,aes(x= as.factor(PCs),y=PercentVAr)) +
geom_point() +
geom_hline(yintercept = 5,linetype = 2, colour="red")
set.seed(1234)
<- as.data.frame(pcs$x[,1:3])
GenePCs $Gene <- rownames(GenePCs)
GenePCs
# Perform Kmeans clustering
<- kmeans(x = GenePCs[,1:3], centers = 5)
kmeans $Cluster <- factor(kmeans$cluster)
GenePCs
ggplot(GenePCs, aes(PC1, -PC2)) +
geom_point(aes(color=Cluster),size=3, shape=16) +
scale_color_manual(values=c("#d14c8d","#9ec22f", "#a9961b", "#cc3a1b", "#4cabdc", "#5ab793")) +
::geom_text_repel((aes(label=ifelse(Gene %in% c("Mpped2","Lhx2", "Etv5","Pax6", "Celf4", "Fgf15","Nr2f1", "Lypd6"), as.character(Gene),''))),
ggrepelbox.padding = 0.2,
point.padding = 0.2,
max.overlaps = 50,
segment.color = 'grey50',
colour = "black")
$PC1 <- GenePCs$PC1
RC.Axis.genes.FDR.filtered$PC2 <- GenePCs$PC2
RC.Axis.genes.FDR.filtered$Cluster <- GenePCs$Cluster RC.Axis.genes.FDR.filtered
<- Gene.smooth.landscape.gam(Sig.gene.expr.data, scale.exp = F)
Genes.landscape <- Genes.landscape[,1:2]
Cluster.means
$Clust.1 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 1]]))
Cluster.means$Clust.2 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 2]]))
Cluster.means$Clust.3 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 3]]))
Cluster.means$Clust.4 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 4]]))
Cluster.means$Clust.5 <- scale(rowMeans(Genes.landscape[,GenePCs$Gene[GenePCs$Cluster == 5]]))
Cluster.means
table(GenePCs$Cluster)
##
## 1 2 3 4 5
## 33 29 30 46 21
Plot.landscape(data = Cluster.means,
genes = paste0("Clust.", 1:5),
col.pal = "BrBG",
ncols = 3)
<- c("Mpped2","Lhx2", "Etv5",
Selected.gene "Pax6", "Nr2f1", "Lypd6")
<- AP.data@dr$RC.DV.axis@cell.embeddings
Selected.gene.expr.data <- as.data.frame(cbind(Selected.gene.expr.data, t(as.matrix(AP.data@data[Selected.gene,]))))
Selected.gene.expr.data
<- Gene.smooth.landscape.gam(Selected.gene.expr.data, scale.exp = T)
Selected.gene.landscape
Plot.landscape(data = Selected.gene.landscape ,
genes = Selected.gene,
col.pal = "RdBu",
ncols = 3)
<- read.table("./Progenitors/Gene.dynamique.csv", sep = ";", header = T)
DVgenes
<- as.character(RC.Axis.genes.FDR.filtered$gene_short_name)
RC.genes <- as.character(DVgenes$Gene)
DV.genes
<- list(RC.genes = RC.genes, DV.genes = DV.genes)
gene.list ::ggvenn(gene.list) ggvenn
$DV_axis_variable <- RC.genes %in% DV.genes
RC.Axis.genes.FDR.filtered
write.table(RC.Axis.genes.FDR.filtered, "./Progenitors/Rostro_Caudal_genes.csv", sep = ";", quote = F)
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "03 mai, 2021, 11,49"
#Packages used
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] monocle_2.14.0 DDRTree_0.1.5 irlba_2.3.3
## [4] VGAM_1.1-2 Biobase_2.46.0 BiocGenerics_0.32.0
## [7] RColorBrewer_1.1-2 patchwork_0.0.1 ggExtra_0.9
## [10] gridExtra_2.3 pheatmap_1.0.12 ggplotify_0.0.5
## [13] dplyr_0.8.3 tidyr_1.0.0 Rtsne_0.15
## [16] mgcv_1.8-33 nlme_3.1-141 princurve_2.1.4
## [19] reticulate_1.18 Seurat_2.3.4 Matrix_1.2-17
## [22] cowplot_1.0.0 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.5 Hmisc_4.3-0
## [4] plyr_1.8.4 igraph_1.2.5 lazyeval_0.2.2
## [7] densityClust_0.3 fastICA_1.2-2 digest_0.6.25
## [10] foreach_1.4.7 htmltools_0.5.0 viridis_0.5.1
## [13] lars_1.2 gdata_2.18.0 magrittr_1.5
## [16] checkmate_1.9.4 cluster_2.1.0 mixtools_1.1.0
## [19] ROCR_1.0-7 limma_3.42.0 matrixStats_0.55.0
## [22] docopt_0.6.1 R.utils_2.9.0 colorspace_1.4-1
## [25] ggrepel_0.8.1 xfun_0.18 sparsesvd_0.2
## [28] crayon_1.4.0 jsonlite_1.7.0 zeallot_0.1.0
## [31] survival_2.44-1.1 zoo_1.8-6 iterators_1.0.12
## [34] ape_5.3 glue_1.4.1 gtable_0.3.0
## [37] kernlab_0.9-29 prabclus_2.3-1 DEoptimR_1.0-8
## [40] scales_1.1.0 bibtex_0.4.2 miniUI_0.1.1.1
## [43] Rcpp_1.0.5 metap_1.1 dtw_1.21-3
## [46] viridisLite_0.3.0 xtable_1.8-4 htmlTable_1.13.2
## [49] gridGraphics_0.5-1 foreign_0.8-72 bit_4.0.4
## [52] proxy_0.4-23 mclust_5.4.5 SDMTools_1.1-221.1
## [55] Formula_1.2-3 tsne_0.1-3 htmlwidgets_1.5.1
## [58] httr_1.4.1 FNN_1.1.3 gplots_3.0.1.1
## [61] fpc_2.2-3 acepack_1.4.1 modeltools_0.2-22
## [64] ica_1.0-2 farver_2.0.1 pkgconfig_2.0.3
## [67] R.methodsS3_1.7.1 flexmix_2.3-15 nnet_7.3-14
## [70] labeling_0.3 tidyselect_0.2.5 rlang_0.4.7
## [73] reshape2_1.4.3 later_1.0.0 munsell_0.5.0
## [76] tools_3.6.3 ggridges_0.5.1 evaluate_0.14
## [79] stringr_1.4.0 fastmap_1.0.1 yaml_2.2.1
## [82] npsurv_0.4-0 knitr_1.26 bit64_4.0.2
## [85] fitdistrplus_1.0-14 robustbase_0.93-5 caTools_1.17.1.2
## [88] purrr_0.3.3 RANN_2.6.1 pbapply_1.4-2
## [91] mime_0.7 slam_0.1-46 R.oo_1.23.0
## [94] ggvenn_0.1.8 hdf5r_1.3.2.9000 compiler_3.6.3
## [97] rstudioapi_0.11 png_0.1-7 lsei_1.2-0
## [100] tibble_2.1.3 stringi_1.4.6 lattice_0.20-41
## [103] HSMMSingleCell_1.6.0 vctrs_0.2.0 pillar_1.4.2
## [106] lifecycle_0.1.0 BiocManager_1.30.10 combinat_0.0-8
## [109] Rdpack_0.11-0 lmtest_0.9-37 data.table_1.12.6
## [112] bitops_1.0-6 gbRd_0.4-11 httpuv_1.5.2
## [115] R6_2.4.1 latticeExtra_0.6-28 promises_1.1.0
## [118] KernSmooth_2.23-15 codetools_0.2-16 MASS_7.3-53
## [121] gtools_3.8.1 assertthat_0.2.1 withr_2.1.2
## [124] qlcMatrix_0.9.7 diptest_0.75-7 doSNOW_1.0.18
## [127] grid_3.6.3 rpart_4.1-15 class_7.3-17
## [130] rmarkdown_2.5 rvcheck_0.1.7 segmented_1.0-0
## [133] shiny_1.4.0 base64enc_0.1-3
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎