1 Differential expression and abundance between conditions

splSetToGet <- "PBMMC,ETV6-RUNX1"
splSetVec <- unlist(strsplit(splSetToGet, ","))
splSetToGet2 <- gsub(",", "_", splSetToGet)
nbPcToComp <- 50
figSize <- 7
library(scater)
library(scran)
library(batchelor)
library(edgeR)
library(tidyverse)
library(patchwork)
library(DT)
fontsize <- theme(axis.text=element_text(size=12), axis.title=element_text(size=16))

Source: Multi-sample comparisons of the OSCA book.

1.1 Exercise 1 - differential expression

Identify label-specific DE genes that are significant in ‘c10’ yet not DE in any other label.

Plot the top-ranked gene for inspection.

1.2 Setting up the data

Load the SCE object (with 1200 cells per sample):

# Read object in:
merged <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged.Rds")
# also get raw counts that were written to a separate file
# (to help file sharing)
merged_counts <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged_counts.Rds")
# put raw counts back:
counts(merged) <- merged_counts
# tidy:
rm(merged_counts)

A brief inspection of the results shows clusters contain varying contributions from samples:

colLabels(merged) <- merged$clusters.mnn
tab <- table(colLabels(merged), merged$SampleName)
tab
##      
##       ETV6-RUNX1_1 ETV6-RUNX1_2 ETV6-RUNX1_3 ETV6-RUNX1_4 PBMMC_1 PBMMC_3
##   c1           159            6            1            6      82      26
##   c2           310           94           67          140       7       2
##   c3           275          465          191          275      17       9
##   c4            35          324          213           81       6       1
##   c5             0            4            1            2      10      16
##   c6             3           13          108           25      55     112
##   c7           375          189           73          329     214      47
##   c8             2            4           50          204       5     273
##   c9             0            0            2            2     254      51
##   c10           34           61           22           12     205      55
##   c11            1           19          107           26      32     106
##   c12            5           18          333           44     111     225
##   c13            0            2           27           40      34     149
##   c14            1            1            5           14     168     128
##      
##       PBMMC_4
##   c1       40
##   c2        9
##   c3       39
##   c4       30
##   c5       10
##   c6      108
##   c7      106
##   c8       20
##   c9        3
##   c10     202
##   c11     116
##   c12     253
##   c13      41
##   c14     223

On the t-SNE plots below, cells are coloured by type or sample (‘batch of origin’). Cluster numbers are superimposed based on the median coordinate of cells assigned to that cluster.

p1 <- plotTSNE(merged, colour_by="SampleGroup", text_by="label", point_size=0.3)
p2 <- plotTSNE(merged, colour_by="SampleName", point_size=0.3) +
  facet_wrap(~colData(merged)$SampleName)
p1