Introduction

In the course materials we focused on differential expression analysis of cluster 8. For this exercise we would like you to perform similar analysis for cluster 6.

Load packages

library(ggplot2)
library(scater)
library(scran)
library(dplyr)
library(RColorBrewer)
library(pheatmap)
library(glue)

Load the data

# load both sce objects
uncorrected <- readRDS("~/Course_Materials/scRNAseq/Robjects/DataIntegration_uncorrected.rds")
corrected <- readRDS("~/Course_Materials/scRNAseq/Robjects/caron_postDeconv_5hCellPerSpl_dsi_PBMMC_ETV6-RUNX1_clust.Rds")

1. Perform differential expression analysis.

Extract louvain clusters from the corrected SCE object, and use the uncorrected SCE objects with findMarkers command. You can select any of the testing method and options.

clusters.mnn <- factor(paste0("c",corrected$louvain))

markers.out <- findMarkers(uncorrected, 
                           groups=clusters.mnn, 
                           direction="up", 
                           block=uncorrected$SampleGroup,
                           row.data=rowData(uncorrected))

2. Obtain markers for cluster 11

c11_markers <- markers.out[["c11"]]
head(c11_markers)
## DataFrame with 6 rows and 31 columns
##                              ID      Symbol            Type  Chromosome
##                     <character> <character>     <character> <character>
## ENSG00000019582 ENSG00000019582        CD74 Gene Expression           5
## ENSG00000122026 ENSG00000122026       RPL21 Gene Expression          13
## ENSG00000169442 ENSG00000169442        CD52 Gene Expression           1
## ENSG00000177954 ENSG00000177954       RPS27 Gene Expression           1
## ENSG00000204287 ENSG00000204287     HLA-DRA Gene Expression           6
## ENSG00000211899 ENSG00000211899        IGHM Gene Expression          14
##                      mean  detected      mean  detected gene_sparsity      mean
##                 <numeric> <numeric> <numeric> <numeric>     <numeric> <numeric>
## ENSG00000019582  14.28182   71.8364  14.28182   71.8364    0.28163636   3.09199
## ENSG00000122026  22.07891   98.8364  22.07891   98.8364    0.01163636   4.08507
## ENSG00000169442   3.53218   74.4000   3.53218   74.4000    0.25600000   1.86734
## ENSG00000177954  36.32727   99.3273  36.32727   99.3273    0.00672727   4.83196
## ENSG00000204287   9.91218   65.2727   9.91218   65.2727    0.34727273   2.52422
## ENSG00000211899   4.47745   54.4000   4.47745   54.4000    0.45600000   1.21521
##                     total      tech        bio      p.value          FDR
##                 <numeric> <numeric>  <numeric>    <numeric>    <numeric>
## ENSG00000019582  2.161608  1.089750 1.07185856 1.48662e-107 4.40595e-105
## ENSG00000122026  1.088986  1.015799 0.07318673  4.39194e-02  1.53013e-01
## ENSG00000169442  1.473812  1.123537 0.35027496  7.12447e-08  8.86049e-07
## ENSG00000177954  0.962502  0.956980 0.00552162  5.74505e-01  9.78169e-01
## ENSG00000204287  1.761641  1.064799 0.69684173  1.81269e-33  1.05305e-31
## ENSG00000211899  1.832932  0.986502 0.84643050  6.51778e-61  9.11760e-59
##                                                                                                    per.block
##                                                                                                  <DataFrame>
## ENSG00000019582 5.08322:0.414473:0.408656:...:4.44901:0.818178:0.488043:...:3.07457:4.00954:1.006867:...:...
## ENSG00000122026 4.64287:0.416728:0.411199:...:3.11800:0.818574:0.718604:...:4.02121:1.28916:1.008164:...:...
## ENSG00000169442 2.33085:0.856185:0.689364:...:2.84416:0.802450:0.755836:...:1.78538:1.37777:1.038834:...:...
## ENSG00000177954 5.41778:0.455774:0.405496:...:4.10025:0.532832:0.556903:...:4.84162:1.04633:0.999843:...:...
## ENSG00000204287 5.06279:0.439068:0.408897:...:3.91515:0.841719:0.593752:...:1.84541:2.71984:1.043405:...:...
## ENSG00000211899 1.12433:0.836950:0.692556:...:1.87587:1.067592:0.849177:...:1.05851:2.26344:0.846499:...:...
##                 chosenHvg       Top      p.value          FDR summary.logFC
##                 <logical> <integer>    <numeric>    <numeric>     <numeric>
## ENSG00000019582      TRUE         1 2.67921e-180 4.74221e-176       3.99632
## ENSG00000122026      TRUE         1  1.07776e-71  2.11960e-68       1.24872
## ENSG00000169442      TRUE         1  5.65607e-89  1.66854e-85       2.54561
## ENSG00000177954      TRUE         1 2.50244e-115 1.47644e-111       1.75667
## ENSG00000204287      TRUE         1 3.97745e-129 3.52005e-125       3.22842
## ENSG00000211899      TRUE         1  1.01675e-88  2.57092e-85       3.43987
##                  logFC.c1 logFC.c10  logFC.c2  logFC.c3  logFC.c4  logFC.c5
##                 <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000019582  1.752459  3.417885  0.897500  1.048645   1.85648  1.201029
## ENSG00000122026  0.577540  0.614424  1.482249  1.192145   1.24872  0.973351
## ENSG00000169442  1.630066  0.814771  1.116442  0.999239   1.62041  1.958925
## ENSG00000177954  0.921814  0.669844  1.455826  1.339475   1.75667  0.931267
## ENSG00000204287  0.923101  2.946220  0.527444  0.170749   1.02352  0.758745
## ENSG00000211899  3.157173  3.439874  2.622119  3.116328   2.26248  0.113740
##                  logFC.c6  logFC.c7  logFC.c8  logFC.c9
##                 <numeric> <numeric> <numeric> <numeric>
## ENSG00000019582  3.996317  4.123194  1.740771  3.278144
## ENSG00000122026 -0.867268  0.782275  0.495846  0.448842
## ENSG00000169442  0.666399  2.545606  1.942246  1.971381
## ENSG00000177954 -0.459606  1.769757  1.140086  1.676943
## ENSG00000204287  3.228422  3.102547  1.059817  2.561409
## ENSG00000211899  3.504918  3.379915  3.139960  3.037744

2a. Visualize one of the marker genes using violin plot

Here you need to use uncorrected SCE object (and Ensembl gene IDs)

plotExpression(uncorrected,
               x=I(factor(corrected$louvain)),
               features="ENSG00000019582", # "CD74",
               colour_by="SampleGroup") +
  facet_wrap(~colour_by) 

2b. Visualize one of the markers on tSNE plot

Here you can use corrected SCE object (use gene symbols).

plotTSNE(corrected, colour_by="CD74", by_exprs_values = "reconstructed")

3. Take top 5 genes from each pairwise comparison and create a heatmap (hint: use the Top field)

c11_top5 <- c11_markers[c11_markers$Top <= 5,]
c11_top5_logFC <- getMarkerEffects(c11_top5)

# change rownames from Ensembl gene IDs to gene symbols 
rownames(c11_top5_logFC) <- rowData(uncorrected)[rownames(c11_top5_logFC), "Symbol"]

pheatmap(c11_top5_logFC, breaks=seq(-5, 5, length.out=101))