splSetToGet <- "PBMMC,ETV6-RUNX1"
splSetVec <- unlist(strsplit(splSetToGet, ","))
splSetToGet2 <- gsub(",", "_", splSetToGet)
nbPcToComp <- 50
figSize <- 7
library(scater)
library(scran)
library(batchelor)
library(edgeR)
library(tidyverse)
library(patchwork)
library(DT)
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))
Source: Multi-sample comparisons of the OSCA book.
Imagine ETV6-RUNX1_4 failed, leaving you with three ETV6-RUNX1 replicates … but all else remains as above, including the clusters identified.
Identify clusters whose abundance differ between conditions.
Load the SCE object (with 1200 cells per sample):
# Read object in:
merged <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged.Rds")
# also get raw counts that were written to a separate file
# (to help file sharing)
merged_counts <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged_counts.Rds")
# put raw counts back:
counts(merged) <- merged_counts
# tidy:
rm(merged_counts)
bcToKeep <- colData(merged) %>%
data.frame() %>%
filter(!SampleName == "ETV6-RUNX1_4") %>%
pull(Barcode)
indToKeep <- which(colnames(merged) %in% bcToKeep)
merged <- merged[,indToKeep]
merged$SampleName <- factor(merged$SampleName)
A brief inspection of the results shows clusters contain varying contributions from samples:
colLabels(merged) <- merged$clusters.mnn
tab <- table(colLabels(merged), merged$SampleName)
tab
##
## ETV6-RUNX1_1 ETV6-RUNX1_2 ETV6-RUNX1_3 PBMMC_1 PBMMC_3 PBMMC_4
## c1 159 6 1 82 26 40
## c2 310 94 67 7 2 9
## c3 275 465 191 17 9 39
## c4 35 324 213 6 1 30
## c5 0 4 1 10 16 10
## c6 3 13 108 55 112 108
## c7 375 189 73 214 47 106
## c8 2 4 50 5 273 20
## c9 0 0 2 254 51 3
## c10 34 61 22 205 55 202
## c11 1 19 107 32 106 116
## c12 5 18 333 111 225 253
## c13 0 2 27 34 149 41
## c14 1 1 5 168 128 223
Count cells assigned to each cluster:
abundances <- table(merged$clusters.mnn, merged$SampleName)
abundances <- unclass(abundances)
head(abundances)
##
## ETV6-RUNX1_1 ETV6-RUNX1_2 ETV6-RUNX1_3 PBMMC_1 PBMMC_3 PBMMC_4
## c1 159 6 1 82 26 40
## c2 310 94 67 7 2 9
## c3 275 465 191 17 9 39
## c4 35 324 213 6 1 30
## c5 0 4 1 10 16 10
## c6 3 13 108 55 112 108
Prepare edgeR object
# think 'DGEList'
extra.info <- colData(merged)[match(colnames(abundances), merged$SampleName),]
y.ab <- DGEList(abundances, samples=extra.info)
y.ab
## An object of class "DGEList"
## $counts
##
## ETV6-RUNX1_1 ETV6-RUNX1_2 ETV6-RUNX1_3 PBMMC_1 PBMMC_3 PBMMC_4
## c1 159 6 1 82 26 40
## c2 310 94 67 7 2 9
## c3 275 465 191 17 9 39
## c4 35 324 213 6 1 30
## c5 0 4 1 10 16 10
## 9 more rows ...
##
## $samples
## group lib.size norm.factors batch
## ETV6-RUNX1_1 1 1200 1 ETV6-RUNX1_1
## ETV6-RUNX1_2 1 1200 1 ETV6-RUNX1_2
## ETV6-RUNX1_3 1 1200 1 ETV6-RUNX1_3
## PBMMC_1 1 1200 1 PBMMC_1
## PBMMC_3 1 1200 1 PBMMC_3
## PBMMC_4 1 1200 1 PBMMC_4
## Sample
## ETV6-RUNX1_1 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264343/SRR9264343/outs/filtered_feature_bc_matrix/
## ETV6-RUNX1_2 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264344/SRR9264344/outs/filtered_feature_bc_matrix/
## ETV6-RUNX1_3 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264345/SRR9264345/outs/filtered_feature_bc_matrix/
## PBMMC_1 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264352/SRR9264352/outs/filtered_feature_bc_matrix/
## PBMMC_3 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264353/SRR9264353/outs/filtered_feature_bc_matrix/
## PBMMC_4 /ssd/personal/baller01/20200511_FernandesM_ME_crukBiSs2020/Data/CaronBourque2020/grch38300/SRR9264354/SRR9264354/outs/filtered_feature_bc_matrix/
## Barcode Run Sample.Name source_name sum
## ETV6-RUNX1_1 GGACGTCCATCGACGC-1 SRR9264343 GSM3872434 ETV6-RUNX1 2887
## ETV6-RUNX1_2 GTACGTAAGTCAATAG-2 SRR9264344 GSM3872435 ETV6-RUNX1 4759
## ETV6-RUNX1_3 CCTCAGTAGAGCTGCA-3 SRR9264345 GSM3872436 ETV6-RUNX1 2357
## PBMMC_1 CCGTACTGTAGGCATG-10 SRR9264352 GSM3872442 PBMMC 4156
## PBMMC_3 CAGGTGCAGCGCCTTG-11 SRR9264353 GSM3872443 PBMMC 2744
## PBMMC_4 GTTTCTAGTGAGGGTT-12 SRR9264354 GSM3872444 PBMMC 3893
## detected subsets_Mito_sum subsets_Mito_detected
## ETV6-RUNX1_1 1266 196 12
## ETV6-RUNX1_2 2012 118 11
## ETV6-RUNX1_3 853 74 9
## PBMMC_1 816 33 8
## PBMMC_3 893 99 11
## PBMMC_4 1502 241 10
## subsets_Mito_percent total block setName discard outlier
## ETV6-RUNX1_1 6.7890544 2887 ETV6-RUNX1 Caron FALSE FALSE
## ETV6-RUNX1_2 2.4795125 4759 ETV6-RUNX1 Caron FALSE FALSE
## ETV6-RUNX1_3 3.1395842 2357 ETV6-RUNX1 Caron FALSE FALSE
## PBMMC_1 0.7940327 4156 PBMMC Caron FALSE FALSE
## PBMMC_3 3.6078717 2744 PBMMC Caron FALSE FALSE
## PBMMC_4 6.1905985 3893 PBMMC Caron FALSE FALSE
## cell_sparsity sizeFactor Sample.Name2 label SampleName
## ETV6-RUNX1_1 0.9413319 0.9317278 ETV6-RUNX1_1 c3 ETV6-RUNX1_1
## ETV6-RUNX1_2 0.9067612 1.7736537 ETV6-RUNX1_2 c2 ETV6-RUNX1_2
## ETV6-RUNX1_3 0.9604708 0.5814998 ETV6-RUNX1_3 c12 ETV6-RUNX1_3
## PBMMC_1 0.9621855 0.5579368 PBMMC_1 c5 PBMMC_1
## PBMMC_3 0.9586172 0.5643228 PBMMC_3 c12 PBMMC_3
## PBMMC_4 0.9303953 1.1336564 PBMMC_4 c14 PBMMC_4
## SampleGroup clusters.mnn
## ETV6-RUNX1_1 ETV6-RUNX1 c3
## ETV6-RUNX1_2 ETV6-RUNX1 c2
## ETV6-RUNX1_3 ETV6-RUNX1 c12
## PBMMC_1 PBMMC c5
## PBMMC_3 PBMMC c12
## PBMMC_4 PBMMC c14
Filter out low-abundance labels
# think 'filterByExpr'
keep <- filterByExpr(y.ab, group=y.ab$samples$SampleGroup)
y.ab <- y.ab[keep,]
summary(keep)
## Mode FALSE TRUE
## logical 1 13
Make design matrix
design <- model.matrix(~factor(SampleGroup), y.ab$samples)
Estimate the NB dispersion
# think 'estimateDisp'
y.ab <- estimateDisp(y.ab, design, trend="none")
Estimate the QL dispersion
# think 'glmQLFit'
fit.ab <- glmQLFit(y.ab, design, robust=TRUE, abundance.trend=FALSE)
Test for differences in abundance between sample groups using glmQLFTest
res <- glmQLFTest(fit.ab, coef=ncol(design))
Get summary
# think 'decideTests'
summary(decideTests(res))
## factor(SampleGroup)PBMMC
## Down 1
## NotSig 11
## Up 1
Display whole table:
# think 'topTags'
topTags(res)
## Coefficient: factor(SampleGroup)PBMMC
## logFC logCPM F PValue FDR
## c14 6.1379897 16.18445 12.5003158 0.0008647146 0.01124129
## c2 -4.6810591 16.08165 8.4873772 0.0052604443 0.03419289
## c4 -3.9368152 16.39144 6.4877527 0.0138598745 0.05037579
## c3 -3.8325512 17.09027 6.2642705 0.0155002433 0.05037579
## c13 2.9332512 15.16283 3.8958438 0.0537296832 0.13969718
## c5 2.7586106 12.84091 3.1687027 0.0809052505 0.17529471
## c8 2.4039992 15.62869 2.7508652 0.1032226412 0.19169919
## c10 1.9779382 16.32000 1.9479700 0.1687371320 0.27419784
## c6 1.1467011 15.79598 0.6827729 0.4124088822 0.59570172
## c11 0.9978747 15.73137 0.5193625 0.4743411764 0.61664353