1 Clustering - exercise

Imagine ETV6-RUNX1_4 failed leaving you with three ETV6-RUNX1 replicates, … but the corrected values remain the same somehow.

Define Louvain and Leiden clusters and compare the outcome. Maybe check clustering quality using per-cluster modularity, too.

1.1 load data

splSetToGet <- "PBMMC,ETV6-RUNX1"
splSetVec <- unlist(strsplit(splSetToGet, ","))

nbPcToComp <- 50
figSize <- 7
library(tidyverse) # data wrangling and plotting (ggplot2)
library(scater) # scRnaSeq QC
library(scran) # scRnaSeq normalisation
library(bluster) # scRnaSeq clustering
library(dynamicTreeCut) # tree cutting in clustering
library(cluster) # for silhouette
library(igraph) # for graph-based clustering and plotting networks 
library(reticulate) # for graph-based clustering with leiden
library(leiden) # for community detection (clustering)
library(pheatmap) # for heatmap plotting
library(patchwork) # to combine plots
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))
sce <- readRDS("../Robjects/DataIntegration_mnn.out.rds") # auto.merge

# remove ETV6-RUNX1_4
bcToKeep <- colData(sce) %>%
  data.frame() %>%
  rownames_to_column("Barcode2") %>%
  filter(!SampleName == "ETV6-RUNX1_4") %>%
  pull(Barcode2)
indToKeep <- which(colnames(sce) %in% bcToKeep)
sce <- sce[,indToKeep]

sce$SampleName <- factor(sce$SampleName)
sce <- runTSNE(sce, dimred = "corrected")

Build graph:

# think 'buildSNNGraph'
snn.gr <- buildSNNGraph(sce, use.dimred="corrected")

1.2 Louvain

Compute Louvain clusters:

ig.louvain <- igraph::cluster_louvain(snn.gr)
cl <- ig.louvain$membership
cl <- factor(cl)
# store membership
sce$louvain <- cl
# show cluster sizes:
table(sce$louvain)
## 
##   1   2   3   4   5   6   7   8   9  10  11 
## 310 422 265 501 175 587 161 156 168 105 150
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))
# show clusters on TSNE
p <- plotTSNE(sce, colour_by="louvain") + fontsize
p

Per-cluster modularity

# compute
mod.out <- bluster::pairwiseModularity(snn.gr,
                                       sce$louvain,
                                       get.weights=TRUE)
# Modularity is proportional to the cluster size,
# so we compute the ratio of the observed to expected weights
# for each cluster pair
ratio <- mod.out$observed/mod.out$expected
lratio <- log10(ratio + 1) # on log scale to improve colour range

# plot
pheatmap(lratio, cluster_rows=FALSE, cluster_cols=FALSE, 
    color=colorRampPalette(c("white", "blue"))(100))

1.3 Leiden

Compute Leiden clusters:

library("leiden")
adjacency_matrix <- igraph::as_adjacency_matrix(snn.gr)
partition <- leiden(adjacency_matrix)
# store membership
sce$leiden <- factor(partition)
# show cluster sizes:
table(sce$leiden)
## 
##   1   2   3   4   5   6   7   8   9  10  11 
## 590 476 433 340 257 173 166 161 155 151  98
p <- plotTSNE(sce, colour_by="leiden") + fontsize
p

1.4 Compare louvain and leiden clusters

tmpTab <- table(sce$louvain, sce$leiden)
tmpTab
##     
##        1   2   3   4   5   6   7   8   9  10  11
##   1    0   0   0 310   0   0   0   0   0   0   0
##   2    0   6 415   0   0   0   0   0   0   1   0
##   3    0   1   0  30 234   0   0   0   0   0   0
##   4    0 469  17   0  15   0   0   0   0   0   0
##   5    2   0   0   0   0 172   0   0   0   1   0
##   6  587   0   0   0   0   0   0   0   0   0   0
##   7    0   0   0   0   0   0   0 161   0   0   0
##   8    0   0   0   0   1   0   0   0 155   0   0
##   9    0   0   1   0   0   1 166   0   0   0   0
##   10   0   0   0   0   7   0   0   0   0   0  98
##   11   1   0   0   0   0   0   0   0   0 149   0
rownames(tmpTab) = paste("louvain", rownames(tmpTab), sep = "_")
colnames(tmpTab) = paste("leiden", colnames(tmpTab) , sep = "_")
pheatmap(tmpTab)