We extract the most mature pallial cells in two steps :
# Cell state scores use to select
FeaturePlot(object = QCFiltered.data,
features.plot = c("AP_signature1", "SP_signature1", "Pal_signature1"),
cols.use = rev(brewer.pal(10,"Spectral")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F
We perform Kmeans clustering on the 3 cell state scores :
# K-means clustering based on AP, SP and Pal signature scores
cl <- kmeans(cbind(QCFiltered.data@meta.data$AP_signature1,
QCFiltered.data@meta.data$Pal_signature1), 3)
QCFiltered.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)
col.pal <- wes_palette("GrandBudapest1", 3, type = "discrete")
p1 <- ggplot(QCFiltered.data@meta.data, aes(x=Pal_signature1, y=SP_signature1, colour = kmeanClust)) +
scale_color_manual(values=col.pal) +
geom_point() +
ggMarginal(p1, type = "histogram", fill="lightgrey")
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
We then extract the glutamatergic neuron branch as the cluster with the highest mean Pallial neurons
# Find the k-means cluster with the highest mean Pal_signature1 score
MeanKclust.Palscore <- aggregate(Pal_signature1 ~ kmeanClust, QCFiltered.data@meta.data, mean)
Palclust <- MeanKclust.Palscore %>% filter(Pal_signature1 == max(Pal_signature1)) %>% pull(kmeanClust)
# Extract barcodes and filter the seurat object
Glut.cells <- QCFiltered.data@meta.data %>% filter(kmeanClust == Palclust) %>% pull(Barcodes)
Glut.QCFiltered.data <- SubsetData(QCFiltered.data, cells.use = Glut.cells , subset.raw = T, do.clean = F)
# K-means clustering based on EN and LN signature scores
cl <- kmeans(cbind(Glut.QCFiltered.data@meta.data$EN_signature1, Glut.QCFiltered.data@meta.data$LN_signature1), 2)
Glut.QCFiltered.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)
# Find the k-means cluster with the highest mean Late_signature1 score
MeanKclust.LNscore <- aggregate(LN_signature1 ~ kmeanClust, Glut.QCFiltered.data@meta.data, mean)
LN.clust <- MeanKclust.LNscore %>% filter(LN_signature1 == max(LN_signature1)) %>% pull(kmeanClust)
# Extract barcodes
LN.cells <- Glut.QCFiltered.data@meta.data %>% filter(kmeanClust == LN.clust) %>% pull(Barcodes)
col.pal <- wes_palette("GrandBudapest1", 2, type = "discrete")
p1 <- ggplot(Glut.QCFiltered.data@meta.data, aes(x=LN_signature1, y=EN_signature1, colour = kmeanClust)) +
annotate(geom = "text", label = paste0(length(LN.cells), " Late Neurons"), x = 1, y =1.5) +
scale_color_manual(values=col.pal) +
geom_point() +
ggMarginal(p1, type = "histogram", fill="lightgrey") ; rm(p1)
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
We then extract the glutamatergic neuron with the highest mean Late neurons
# Filter the seurat object
Glut.QCFiltered.data <- SubsetData(Glut.QCFiltered.data, cells.use = LN.cells , subset.raw = T, do.clean = F)
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = "#969696",
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
For more detail on the scrattch.hicat please refer to the package page.
We decide to exclude Cell cycle, ribosomal and mitochondrial associated genes, as well as Xist for the clustering step.
# Exclude cell cycle associated genes
s.genes <- c("Mcm5", "Pcna", "Tym5", "Fen1", "Mcm2", "Mcm4", "Rrm1", "Ung", "Gins2", "Mcm6", "Cdca7", "Dtl", "Prim1", "Uhrf1", "Mlf1ip", "Hells", "Rfc2", "Rap2", "Nasp", "Rad51ap1", "Gmnn", "Wdr76", "Slbp", "Ccne2", "Ubr7", "Pold3", "Msh2", "Atad2", "Rad51", "Rrm2", "Cdc45", "Cdc6", "Exo1", "Tipin", "Dscc1", "Blm", " Casp8ap2", "Usp1", "Clspn", "Pola1", "Chaf1b", "Brip1", "E2f8")
g2m.genes <- c("Hmgb2", "Ddk1","Nusap1", "Ube2c", "Birc5", "Tpx2", "Top2a", "Ndc80", "Cks2", "Nuf2", "Cks1b", "Mki67", "Tmpo", " Cenpk", "Tacc3", "Fam64a", "Smc4", "Ccnb2", "Ckap2l", "Ckap2", "Aurkb", "Bub1", "Kif11", "Anp32e", "Tubb4b", "Gtse1", "kif20b", "Hjurp", "Cdca3", "Hn1", "Cdc20", "Ttk", "Cdc25c", "kif2c", "Rangap1", "Ncapd2", "Dlgap5", "Cdca2", "Cdca8", "Ect2", "Kif23", "Hmmr", "Aurka", "Psrc1", "Anln", "Lbr", "Ckap5", "Cenpe", "Ctcf", "Nek2", "G2e3", "Gas2l3", "Cbx5", "Cenpa")
# Exclude genes detected in less than 3 cells
num.cells <- Matrix::rowSums(Glut.QCFiltered.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 3)])
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE),
grep(pattern = "^mt-", x = genes.use, value = TRUE),
"Xist", s.genes, g2m.genes)
genes.use <- genes.use[!genes.use %in% GenesToRemove]
dgeMatrix_count <- as.matrix(Glut.QCFiltered.data@raw.data)[rownames(Glut.QCFiltered.data@raw.data) %in% genes.use,]
dgeMatrix_cpm <- cpm(dgeMatrix_count)
norm.dat <- log2(dgeMatrix_cpm + 1)
gene.counts <- log2(colSums(as.matrix(Data.matrix$norm.dat) > 0))
nUMI <- log2(colSums(Data.matrix$raw.dat))
perctMito <- Glut.QCFiltered.data@meta.data$percent.mito
perctRibo <- Glut.QCFiltered.data@meta.data$percent.ribo
rm.eigen <- as.matrix(cbind(gene.counts,
row.names(rm.eigen) <- names(gene.counts)
colnames(rm.eigen) <- c("log2nGenes",
rm(gene.counts, nUMI, perctMito, perctRibo)
# Parameters for iterative clustering
de.param <- de_param(padj.th = 0.01,
lfc.th = 0.9,
low.th = 1,
q1.th = 0.25,
q2.th = NULL,
q.diff.th = 0.7,
de.score.th = 30,
min.cells = 10)
The default iter_clust function use in this version of the scrattch.hicat package does not allow to set the k.param argument. We modified this function to allow this argument to be set to other values.
# Perform the iterative clustering
iter.result <- iter_clust(norm.dat,
counts = raw.dat,
dim.method = "pca",
max.dim = 15,
de.param = de.param,
type = "undirectional",
rm.eigen = rm.eigen,
k.param = 8,
rm.th = 0.7,
vg.padj.th = 0.5,
method = "louvain",
prefix = "test-iter_clust",
verbose = F)
Glut.QCFiltered.data@meta.data$Core_cells <- ifelse(map.results$map.df$prob > 0.95, "Core.Cells", "Intermediate")
group.by = "Core_cells",
reduction.use = "spring",
cols.use = wes_palette("Royal1", 2, type = "discrete"),
dim.1 = 1,
dim.2 = 2,
label.size = 2,
no.legend = F)
# Remove intermediate cells
Core.cells <- Glut.QCFiltered.data@meta.data %>% dplyr::filter(Core_cells == "Core.Cells") %>% pull(Barcodes)
Glut.QCFiltered.data <- SubsetData(Glut.QCFiltered.data, cells.use = Core.cells , subset.raw = T, do.clean = F)
anno.df <- anno.df %>% filter(sample_name %in% Core.cells)
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
label.size = 3,
no.legend = T,
cols.use = colors)
Manuscript element Fig. 2C
data <- cbind(sample_name = colnames(Glut.QCFiltered.data@data),
Selected.markers <- c("Slc17a6","Tbr1", "Gap43", "Foxg1","Mn1","Reln",
"Lhx5","Calb2","Samd3", "Dmrta2", "Nr2f2", "Ebf2",
"Gm27199", "Chl1","Unc5c", "Grm1", "Insm2","Trp73",
"Ebf3", "Cdkn1a", "Lhx1", "Lhx1os","Zfp503", "Cacna2d2",
"Serpine2", "Rspo3", "Grm2", "Cxcr4","Lamp5","Neurod2",
"Neurod6", "Tcf4","Lrfn5", "Fezf1", "Fezf2", "Pcp4",
"Nfia", "Nfib", "Nfix", "Bhlhe22","Ppp1r14c", "Scg2",
"Meis2","Pbx3", "Mab21l1", "Barhl2", "Runx1t1","Mef2c",
"Nxph1", "Meis1", "Tshz2","Lhx9", "Nr4a2", "Cck", "Foxp2",
"Pou3f2", "Sox6","Ebf1","Nrip3", "Zic1", "Etv1", "Tfap2e", "Pax6")
genes = Selected.markers,
group_order = levels(cl.fact),
grouping = "primary_type",
log_scale = FALSE,
font_size = 7,
label_height = 10,
label_type = "angle",
bg_color ="#f7f7f7")
Manuscript Fig. 2D
Load QC filtered dataset
As for the glutamaterigic neurons, we extract the most mature sub-pallial cells in two steps :
FeaturePlot(object = QCFiltered.data,
features.plot = c("AP_signature1", "Pal_signature1", "SP_signature1"),
cols.use = rev(brewer.pal(10,"Spectral")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F
We perform K-means clustering on the 3 cell state scores :
# K-means clustering based on AP, SP and Pal signature scores
cl <- kmeans(x = cbind(QCFiltered.data@meta.data$AP_signature1,
centers = 3)
QCFiltered.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)
col.pal <- wes_palette("GrandBudapest1", 3, type = "discrete")
p1 <- ggplot(QCFiltered.data@meta.data, aes(x=Pal_signature1, y=SP_signature1, colour = kmeanClust)) +
scale_color_manual(values=col.pal) +
geom_point() +
ggMarginal(p1, type = "histogram", fill="lightgrey")
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
We then extract the GABAergic neuron branche as beeing the K-means cluster with the highest mean Sub-Pallial neurons
# Find the k-means clusters with the highest mean SP_signature1 score
MeanKclust.SPscore <- aggregate(SP_signature1 ~ kmeanClust, QCFiltered.data@meta.data, mean)
SPclust <- MeanKclust.SPscore %>% filter(SP_signature1 == max(SP_signature1)) %>% pull(kmeanClust)
# Extract barcodes and filter the seurat object
SP.cells <- QCFiltered.data@meta.data %>% filter(kmeanClust == SPclust) %>% pull(Barcodes)
SP.QCFiltered.data <- SubsetData(QCFiltered.data, cells.use = SP.cells , subset.raw = T, do.clean = F)
# Calculate SP early neurons signature score based on selected marker genes
LNgenes <- c("Dlx6os1", "Nudt4", "Abracl", "Arl4d", "Tmem123", "Ccdc109b", "Hmgn2", "E130006D01Rik", "Cdca7")
genes.list <- list(LNgenes)
enrich.name <- "EN_signature"
SP.QCFiltered.data <- AddModuleScore(SP.QCFiltered.data,
genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
# K-means clustering based on EN and LN signature scores
cl <- kmeans(cbind(SP.QCFiltered.data@meta.data$EN_signature1, SP.QCFiltered.data@meta.data$LN_signature1), 3)
SP.QCFiltered.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)
# Find the k-means clusters with the highest mean Late_signature1 score
MeanKclust.LNscore <- aggregate(LN_signature1 ~ kmeanClust, SP.QCFiltered.data@meta.data, mean)
LN.clust <- MeanKclust.LNscore %>% filter(LN_signature1 == max(LN_signature1)) %>% pull(kmeanClust)
# Extract barcodes
LN.cells <- SP.QCFiltered.data@meta.data %>% filter(kmeanClust == LN.clust) %>% pull(Barcodes)
col.pal <- wes_palette("GrandBudapest1", 3, type = "discrete")
p1 <- ggplot(SP.QCFiltered.data@meta.data, aes(x=LN_signature1, y=EN_signature1, colour = kmeanClust)) +
annotate(geom = "text", label = paste0(length(LN.cells), " Late Neurons"), x = 1, y =1.5) +
scale_color_manual(values=col.pal) +
geom_point() +
ggMarginal(p1, type = "histogram", fill="lightgrey") ; rm(p1)
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
# Filter the seurat object
SP.QCFiltered.data <- SubsetData(SP.QCFiltered.data, cells.use = LN.cells , subset.raw = T, do.clean = F)
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = "#969696",
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T)
We perform the clustering procedure as for the glutamatergic neurons
We decide to exclude Cell cycle, ribosomal and mitochondrial associated genes, as well as Xist for the clustering step.
# Exclude cell cycle associated genes
s.genes <- c("Mcm5", "Pcna", "Tym5", "Fen1", "Mcm2", "Mcm4", "Rrm1", "Ung", "Gins2", "Mcm6", "Cdca7", "Dtl", "Prim1", "Uhrf1", "Mlf1ip", "Hells", "Rfc2", "Rap2", "Nasp", "Rad51ap1", "Gmnn", "Wdr76", "Slbp", "Ccne2", "Ubr7", "Pold3", "Msh2", "Atad2", "Rad51", "Rrm2", "Cdc45", "Cdc6", "Exo1", "Tipin", "Dscc1", "Blm", " Casp8ap2", "Usp1", "Clspn", "Pola1", "Chaf1b", "Brip1", "E2f8")
g2m.genes <- c("Hmgb2", "Ddk1","Nusap1", "Ube2c", "Birc5", "Tpx2", "Top2a", "Ndc80", "Cks2", "Nuf2", "Cks1b", "Mki67", "Tmpo", " Cenpk", "Tacc3", "Fam64a", "Smc4", "Ccnb2", "Ckap2l", "Ckap2", "Aurkb", "Bub1", "Kif11", "Anp32e", "Tubb4b", "Gtse1", "kif20b", "Hjurp", "Cdca3", "Hn1", "Cdc20", "Ttk", "Cdc25c", "kif2c", "Rangap1", "Ncapd2", "Dlgap5", "Cdca2", "Cdca8", "Ect2", "Kif23", "Hmmr", "Aurka", "Psrc1", "Anln", "Lbr", "Ckap5", "Cenpe", "Ctcf", "Nek2", "G2e3", "Gas2l3", "Cbx5", "Cenpa")
# Exclude genes detected in less than 3 cells
num.cells <- Matrix::rowSums(SP.QCFiltered.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 3)])
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE), grep(pattern = "^mt-", x = genes.use, value = TRUE), s.genes, g2m.genes, "Xist")
genes.use <- genes.use[!genes.use %in% GenesToRemove]
dgeMatrix_count <- as.matrix(SP.QCFiltered.data@raw.data)[rownames(SP.QCFiltered.data@raw.data) %in% genes.use,]
dgeMatrix_cpm <- cpm(dgeMatrix_count)
norm.dat <- log2(dgeMatrix_cpm + 1)
gene.counts <- log2(colSums(as.matrix(Data.matrix$norm.dat) > 0))
nUMI <- log2(colSums(Data.matrix$raw.dat))
perctMito <- SP.QCFiltered.data@meta.data$percent.mito
perctRibo <- SP.QCFiltered.data@meta.data$percent.ribo
rm.eigen <- as.matrix(cbind(gene.counts,
row.names(rm.eigen) <- names(gene.counts)
colnames(rm.eigen) <- c("log2nGenes",
rm(gene.counts, nUMI, perctMito, perctRibo)
# Parameters for iterative clustering
de.param <- de_param(padj.th = 0.01,
lfc.th = 0.9,
low.th = 1,
q1.th = 0.25,
q2.th = NULL,
q.diff.th = 0.7,
de.score.th = 30,
min.cells = 10)
# Perform the iterative clustering
iter.result <- iter_clust(norm.dat,
counts = raw.dat,
dim.method = "pca",
max.dim = 15,
de.param = de.param,
type = "undirectional",
rm.eigen = rm.eigen,
rm.th = 0.7,
vg.padj.th = 0.5,
method = "louvain",
prefix = "test-iter_clust",
verbose = F)
# Merge clusters which are not seperable by DEGs
rd.dat <- t(norm.dat[iter.result$markers,])
merge.result <- merge_cl(norm.dat,
cl = iter.result$cl,
rd.dat = rd.dat,
de.param = de.param)
cat(length(unique(merge.result$cl))," Clusters\n")
## 6 Clusters
## 164 DE genes
SP.QCFiltered.data@ident <- as.factor(merge.result$cl)
SP.QCFiltered.data@meta.data$LN.ident <-as.character(SP.QCFiltered.data@ident)
colors <- c("#c773a7", "#b79f0b", "#3ca73f", "#31b6bd", "#ec756d", "#7293c8")
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
label.size = 4,
no.legend = T,
cols.use = colors)
# Load full dataset
Allcells.data <- readRDS("./Clustered.cells.RDS")
# Transfer the identities
Rename.Clust <- function(Clustdata, RawQCdata) {
for(i in unique(Clustdata@meta.data$LN.ident)){
New.ident <- i
Barcodes <- rownames(subset(Clustdata@meta.data, Clustdata@meta.data$LN.ident == i))
print(paste0("Cluster_",i,": ",length(Barcodes), " Cells"))
Barcodes <- Barcodes[Barcodes %in% rownames(RawQCdata@meta.data)]
RawQCdata <- SetIdent(RawQCdata, cells.use = Barcodes ,ident.use = paste0("LN.GABA.",i))
Allcells.data <- Rename.Clust(Clustdata = SP.QCFiltered.data, RawQCdata = Allcells.data)
## [1] "Cluster_10: 113 Cells"
## [1] "Cluster_1: 26 Cells"
## [1] "Cluster_7: 45 Cells"
## [1] "Cluster_11: 54 Cells"
## [1] "Cluster_9: 16 Cells"
## [1] "Cluster_8: 23 Cells"
colors2 <- c("#969696",
"#ec756d", "#c773a7", "#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#cc8778" , "#d14c8d", "#4cabdc", "#5ab793", "#e7823a","#e6bb9b", "#046c9a", "#4784a2" , "#4990c9")
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
label.size = 2,
no.legend = T,
cols.use = colors2)
Manuscript Fig. 2A
# Extract late neurons clusters
glut.clusters <- paste0(c("LN.Glut."),c("13", "1", "14", "22", "16", "19", "24", "26", "20", "21"))
gaba.clusters <- paste0( c("LN.GABA."),c("1", "7", "8", "9", "10", "11"))
clusters <- c(gaba.clusters, glut.clusters)
All.LN.data <- SubsetData(Allcells.data, ident.use = clusters, subset.raw = T, do.clean = F)
rm(glut.clusters, gaba.cluster, clusters)
colors <- c("#ec756d", "#c773a7", "#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "#e7823a", "#046c9a", "#4990c9")
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
label.size = 2,
no.legend = T,
cols.use = colors)
# Sub-pallial neurons
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.GABA.", c(8,11))), ident.use = "dLGE")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.GABA.", c(7,9,10))), ident.use = "Striatal_IN")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.GABA.", c(1))), ident.use = "Cortical_IN")
# Pallial neurons
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.Glut.", c(1,13,14))), ident.use = "CR")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.Glut.", c(22))), ident.use = "Fezf1")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.Glut.", c(16,19))), ident.use = "Pcp4")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.Glut.", c(24))), ident.use = "Nr4a2")
All.LN.data <- SetIdent(All.LN.data, cells.use = WhichCells(All.LN.data, ident = paste0("LN.Glut.", c(26,20,21))), ident.use = "Foxp2")
colors <- c("#ea6569", "#7694c8", "#e6a0c4", "#e2d203", "#096b9a", "#49adc7", "#e58606", "#b31021")
# Prepare annotation for hicat pipeline
colorsident <- cbind(ident = unique(as.character(All.LN.data@ident)),
colors = colors,
id = unique(as.character(All.LN.data@ident)))
# Create annotation data.frame
anno.df <- as.data.frame(cbind(
sample_name = row.names(All.LN.data@meta.data),
primary_type_id = colorsident[match(as.character(All.LN.data@ident), colorsident[,1]),3],
primary_type_label = as.character(All.LN.data@ident),
primary_type_color = colorsident[match(as.character(All.LN.data@ident), colorsident[,1]),2]
# Make a data.frame of unique cluster id, type, color, and broad type
cl.df <- anno.df %>%
select(primary_type_id, primary_type_label, primary_type_color) %>%
colnames(cl.df)[1:3] <- c("cluster_id", "cluster_label", "cluster_color")
# Sort by cluster_id
cl.df <- arrange(cl.df, cluster_id)
row.names(cl.df) <- cl.df$cluster_id
cl.fact <- setNames(factor(anno.df$primary_type_id), anno.df$sample_name)
# Filter genes
num.cells <- Matrix::rowSums(All.LN.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
GenesToRemove <- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE), grep(pattern = "^mt-", x = genes.use, value = TRUE), "Xist")
genes.use <- genes.use[!genes.use %in% GenesToRemove] ; rm(GenesToRemove, num.cells)
All.LN.data@raw.data <- All.LN.data@raw.data[genes.use, ]
All.LN.data <- NormalizeData(object = All.LN.data,
normalization.method = "LogNormalize",
scale.factor = round(median(All.LN.data@meta.data$nUMI)),
display.progress = F)
# Find all var genes
All.LN.data <- FindVariableGenes(object = All.LN.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.02,
x.high.cutoff = 3,
y.cutoff = 1,
do.plot = F, display.progress = F)
dgeMatrix_count <- as.matrix(All.LN.data@raw.data)[rownames(All.LN.data@raw.data) %in% All.LN.data@var.genes,]
dgeMatrix_cpm <- cpm(dgeMatrix_count)
norm.dat <- log2(dgeMatrix_cpm + 1) ; rm(dgeMatrix_cpm)
Data.matrix <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat) ; attach(Data.matrix)
Scrattch.hicat perform hierarchical clustering on a cluster correlation matrix based on median expression values for the top 50 most DEGs between every pair of clusters. It estimates branch confidence level using a bootstrap approach implemented by the pvclust package
# Build the dendrogram
dend.result <- build_dend(cl.med[,levels(cl.fact)],
l.color= setNames(as.character(cl.df$cluster_color), row.names(cl.df)),
nboot = 100)
dend <- dend.result$dend
dend.labeled <- dend.result$dend
labels(dend.labeled) <- cl.df[labels(dend.labeled), "cluster_label"]
# Import dendrogram into Seurat object
All.LN.data@cluster.tree[[1]] <- ape::as.phylo(dend.labeled)
labels(All.LN.data@cluster.tree[[1]]) <- cl.df[as.numeric(labels(All.LN.data@cluster.tree[[1]])), "cluster_label"]
All.LN.data@ident <- factor(All.LN.data@ident, levels = c("Cortical_IN","Striatal_IN","dLGE","CR","Fezf1","Pcp4","Nr4a2","Foxp2"))
p1 <- ggdendrogram(dendro_data(as.hclust(dend.labeled)), labels = F, rotate = T) + scale_y_reverse()
p2 <- Cluster.dotplot(All.LN.data,
Marker.genes = rev(c("Foxg1","Tbr1", "Slc17a6",
"Reln", "Lhx5", "Neurod6",
"Pcp4", "Fezf1", "Pbx3",
"Foxp2", "Nr4a2", "Dlx5", "Gad2",
"Six3", "Sp8", "Zfp503", "Isl1", "Lhx6", "Sst")),
min.expression = 0.7, percent.mi=0.15, maxdot.size = 5)
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(0.2, 1.5))
Manuscript Fig. 2B
