Imagine ETV6-RUNX1_4 failed leaving you with three ETV6-RUNX1 replicates, … but the corrected values remain the same somehow.
Define Louvain and Leiden clusters and compare the outcome. Maybe check clustering quality using per-cluster modularity, too.
splSetToGet <- "PBMMC,ETV6-RUNX1"
splSetVec <- unlist(strsplit(splSetToGet, ","))
nbPcToComp <- 50
figSize <- 7
library(tidyverse) # data wrangling and plotting (ggplot2)
library(scater) # scRnaSeq QC
library(scran) # scRnaSeq normalisation
library(bluster) # scRnaSeq clustering
library(dynamicTreeCut) # tree cutting in clustering
library(cluster) # for silhouette
library(igraph) # for graph-based clustering and plotting networks
library(reticulate) # for graph-based clustering with leiden
library(leiden) # for community detection (clustering)
library(pheatmap) # for heatmap plotting
library(patchwork) # to combine plots
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))
sce <- readRDS("../Robjects/DataIntegration_mnn.out.rds") # auto.merge
# remove ETV6-RUNX1_4
bcToKeep <- colData(sce) %>%
data.frame() %>%
rownames_to_column("Barcode2") %>%
filter(!SampleName == "ETV6-RUNX1_4") %>%
pull(Barcode2)
indToKeep <- which(colnames(sce) %in% bcToKeep)
sce <- sce[,indToKeep]
sce$SampleName <- factor(sce$SampleName)
sce <- runTSNE(sce, dimred = "corrected")
Build graph:
# think 'buildSNNGraph'
snn.gr <- buildSNNGraph(sce, use.dimred="corrected")
Compute Louvain clusters:
ig.louvain <- igraph::cluster_louvain(snn.gr)
cl <- ig.louvain$membership
cl <- factor(cl)
# store membership
sce$louvain <- cl
# show cluster sizes:
table(sce$louvain)
##
## 1 2 3 4 5 6 7 8 9 10 11
## 310 422 265 501 175 587 161 156 168 105 150
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))
# show clusters on TSNE
p <- plotTSNE(sce, colour_by="louvain") + fontsize
p
Per-cluster modularity
# compute
mod.out <- bluster::pairwiseModularity(snn.gr,
sce$louvain,
get.weights=TRUE)
# Modularity is proportional to the cluster size,
# so we compute the ratio of the observed to expected weights
# for each cluster pair
ratio <- mod.out$observed/mod.out$expected
lratio <- log10(ratio + 1) # on log scale to improve colour range
# plot
pheatmap(lratio, cluster_rows=FALSE, cluster_cols=FALSE,
color=colorRampPalette(c("white", "blue"))(100))
Compute Leiden clusters:
library("leiden")
adjacency_matrix <- igraph::as_adjacency_matrix(snn.gr)
partition <- leiden(adjacency_matrix)
# store membership
sce$leiden <- factor(partition)
# show cluster sizes:
table(sce$leiden)
##
## 1 2 3 4 5 6 7 8 9 10 11
## 590 476 433 340 257 173 166 161 155 151 98
p <- plotTSNE(sce, colour_by="leiden") + fontsize
p
tmpTab <- table(sce$louvain, sce$leiden)
tmpTab
##
## 1 2 3 4 5 6 7 8 9 10 11
## 1 0 0 0 310 0 0 0 0 0 0 0
## 2 0 6 415 0 0 0 0 0 0 1 0
## 3 0 1 0 30 234 0 0 0 0 0 0
## 4 0 469 17 0 15 0 0 0 0 0 0
## 5 2 0 0 0 0 172 0 0 0 1 0
## 6 587 0 0 0 0 0 0 0 0 0 0
## 7 0 0 0 0 0 0 0 161 0 0 0
## 8 0 0 0 0 1 0 0 0 155 0 0
## 9 0 0 1 0 0 1 166 0 0 0 0
## 10 0 0 0 0 7 0 0 0 0 0 98
## 11 1 0 0 0 0 0 0 0 0 149 0
rownames(tmpTab) = paste("louvain", rownames(tmpTab), sep = "_")
colnames(tmpTab) = paste("leiden", colnames(tmpTab) , sep = "_")
pheatmap(tmpTab)