Data

library(DESeq2)
library(gplots)
library(RColorBrewer)
library(limma)
library(tidyverse)
# Read the sample information into R
sampleinfo <- read.delim("../data/SampleInfo.Corrected.txt", 
                         stringsAsFactors = F)
# Read the data into R
seqdata <- read.delim("../data/GSE60450_Lactation.featureCounts", 
                      stringsAsFactors = F,
                      comment = "#")
# Remove first two columns from seqdata
countdata <- as.data.frame(seqdata) %>% 
    column_to_rownames("Geneid") %>% # turn the geneid column into rownames
    rename_all(str_remove, ".bam") %>% # remove the ".bam" from the column names
    select(sampleinfo$Sample) %>% # keep sample columns using sampleinfo
    as.matrix()
# filter data
keep <- rowSums(countdata) > 5
countdata <- countdata[keep,]
# rlogcounts
rlogcounts <- rlog(countdata)
# We estimate the variance for each row in the logcounts matrix
countVar <- apply(rlogcounts, 1, var)
# DGE list
design <- as.formula(~ CellType)
# create the DESeqDataSet object
ddsObj <- DESeqDataSetFromMatrix(countData = countdata,
                              colData = sampleinfo,
                              design = design)
some variables in design formula are characters, converting to factors
ddsObj <- estimateSizeFactors(ddsObj)
# normalise dcounts
logcounts <- log2(countdata + 1)
normalizedCounts <- counts(ddsObj, normalized=TRUE) 
logNormalizedCounts <- log2(normalizedCounts + 1)

Challenge 1

  1. Use the DESeq2 function rlog to transform the count data. This function also normalises for library size.
  2. Plot the count distribution boxplots with this data How has this effected the count distributions?
rlogcounts <- rlog(countdata)
statusCol <- as.numeric(factor(sampleinfo$Status)) + 1
# Check distributions of samples using boxplots
boxplot(rlogcounts, 
        xlab="", 
        ylab="Log2(Counts)",
        las=2,
        col=statusCol)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(as.matrix(logcounts)), col="blue")

Challenge 2

Redo the heatmap using the top 500 LEAST variable genes. Change the colour scheme to “PiYG” and redo the heatmap. Try ?RColorBrewer and see what other colour schemes are available. Change the sample names to group using the labCol argument Remove the gene names from the righthand side of the plot using labRow

Solution

# Get the gene names for the top 500 least variable genes
lowVar <- order(countVar)[1:500]
# Subset logcounts matrix
hmData <- rlogcounts[lowVar,]
## Get some nicer colours
mypalette <- brewer.pal(11,"PiYG")
## http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- c("purple","orange")[sampleinfo$CellType]
# Plot the heatmap
heatmap.2(hmData, 
          col=rev(morecols(50)),
          trace="none", 
          main="Top 500 most variable genes across samples",
          ColSideColors=col.cell,scale="row",
          labCol=sampleinfo$Group, 
          labRow = NA)

Challenge 3

Plot the biased and unbiased MA plots both samples side by side to see the before and after normalisation.

par(mfrow=c(2,2))
plotMA(logcounts, array = 7, main="MCL1.LA - Raw")
abline(h=0,col="grey")
plotMA(logNormalizedCounts, array = 7, main="MCL1.LA - Normalised")
abline(h=0,col="grey")
plotMA(logcounts, array = 11, main="MCL1.LE - Raw")
abline(h=0,col="grey")
plotMA(logNormalizedCounts, array = 11, main="MCL1.LE - Normalised")
abline(h=0,col="grey")

LS0tCnRpdGxlOiAiUk5BLXNlcSBhbmFseXNpcyBpbiBSIgphdXRob3I6ICJTdGVwaGFuZSBCYWxsZXJlYXUsIE1hcmsgRHVubmluZywgT3NjYXIgUnVlZGEsIEFzaGxleSBTYXdsZSIKZGF0ZTogJ2ByIGZvcm1hdChTeXMudGltZSgpLCAiTGFzdCBtb2RpZmllZDogJWQgJWIgJVkiKWAnCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwpsYXlvdXQ6IHBhZ2UKc3VidGl0bGU6IFByZS1wcm9jZXNzc2luZyBSTkEtc2VxIGRhdGEgLSBDaGFsbGVuZ2UgU29sdXRpb25zCi0tLQoKIyMgRGF0YSAKCmBgYHtyIHNldHVwLCBtZXNzYWdlID0gRkFMU0V9CmxpYnJhcnkoREVTZXEyKQpsaWJyYXJ5KGdwbG90cykKbGlicmFyeShSQ29sb3JCcmV3ZXIpCmxpYnJhcnkobGltbWEpCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCmBgYHtyIHByZXBhcmVEYXRhLCBtZXNzYWdlPUZBTFNFfQojIFJlYWQgdGhlIHNhbXBsZSBpbmZvcm1hdGlvbiBpbnRvIFIKc2FtcGxlaW5mbyA8LSByZWFkLmRlbGltKCIuLi9kYXRhL1NhbXBsZUluZm8uQ29ycmVjdGVkLnR4dCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IEYpCiMgUmVhZCB0aGUgZGF0YSBpbnRvIFIKc2VxZGF0YSA8LSByZWFkLmRlbGltKCIuLi9kYXRhL0dTRTYwNDUwX0xhY3RhdGlvbi5mZWF0dXJlQ291bnRzIiwgCiAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gRiwKICAgICAgICAgICAgICAgICAgICAgIGNvbW1lbnQgPSAiIyIpCiMgUmVtb3ZlIGZpcnN0IHR3byBjb2x1bW5zIGZyb20gc2VxZGF0YQpjb3VudGRhdGEgPC0gYXMuZGF0YS5mcmFtZShzZXFkYXRhKSAlPiUgCiAgICBjb2x1bW5fdG9fcm93bmFtZXMoIkdlbmVpZCIpICU+JSAjIHR1cm4gdGhlIGdlbmVpZCBjb2x1bW4gaW50byByb3duYW1lcwogICAgcmVuYW1lX2FsbChzdHJfcmVtb3ZlLCAiLmJhbSIpICU+JSAjIHJlbW92ZSB0aGUgIi5iYW0iIGZyb20gdGhlIGNvbHVtbiBuYW1lcwogICAgc2VsZWN0KHNhbXBsZWluZm8kU2FtcGxlKSAlPiUgIyBrZWVwIHNhbXBsZSBjb2x1bW5zIHVzaW5nIHNhbXBsZWluZm8KICAgIGFzLm1hdHJpeCgpCiMgZmlsdGVyIGRhdGEKa2VlcCA8LSByb3dTdW1zKGNvdW50ZGF0YSkgPiA1CmNvdW50ZGF0YSA8LSBjb3VudGRhdGFba2VlcCxdCiMgcmxvZ2NvdW50cwpybG9nY291bnRzIDwtIHJsb2coY291bnRkYXRhKQojIFdlIGVzdGltYXRlIHRoZSB2YXJpYW5jZSBmb3IgZWFjaCByb3cgaW4gdGhlIGxvZ2NvdW50cyBtYXRyaXgKY291bnRWYXIgPC0gYXBwbHkocmxvZ2NvdW50cywgMSwgdmFyKQojIERHRSBsaXN0CmRlc2lnbiA8LSBhcy5mb3JtdWxhKH4gQ2VsbFR5cGUpCiMgY3JlYXRlIHRoZSBERVNlcURhdGFTZXQgb2JqZWN0CmRkc09iaiA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvdW50ZGF0YSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sRGF0YSA9IHNhbXBsZWluZm8sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlc2lnbiA9IGRlc2lnbikKZGRzT2JqIDwtIGVzdGltYXRlU2l6ZUZhY3RvcnMoZGRzT2JqKQojIG5vcm1hbGlzZSBkY291bnRzCmxvZ2NvdW50cyA8LSBsb2cyKGNvdW50ZGF0YSArIDEpCm5vcm1hbGl6ZWRDb3VudHMgPC0gY291bnRzKGRkc09iaiwgbm9ybWFsaXplZD1UUlVFKSAKbG9nTm9ybWFsaXplZENvdW50cyA8LSBsb2cyKG5vcm1hbGl6ZWRDb3VudHMgKyAxKQpgYGAKCj4gIyMjIENoYWxsZW5nZSAxCj4KPiAxLiBVc2UgdGhlIGBERVNlcTJgIGZ1bmN0aW9uIGBybG9nYCB0byB0cmFuc2Zvcm0gdGhlIGNvdW50IGRhdGEuIFRoaXMgZnVuY3Rpb24KPiBhbHNvIG5vcm1hbGlzZXMgZm9yIGxpYnJhcnkgc2l6ZS4KPiAyLiBQbG90IHRoZSBjb3VudCBkaXN0cmlidXRpb24gYm94cGxvdHMgd2l0aCB0aGlzIGRhdGEKPiBIb3cgaGFzIHRoaXMgZWZmZWN0ZWQgdGhlIGNvdW50IGRpc3RyaWJ1dGlvbnM/CgpgYGB7cn0KcmxvZ2NvdW50cyA8LSBybG9nKGNvdW50ZGF0YSkKCnN0YXR1c0NvbCA8LSBhcy5udW1lcmljKGZhY3RvcihzYW1wbGVpbmZvJFN0YXR1cykpICsgMQojIENoZWNrIGRpc3RyaWJ1dGlvbnMgb2Ygc2FtcGxlcyB1c2luZyBib3hwbG90cwpib3hwbG90KHJsb2djb3VudHMsIAogICAgICAgIHhsYWI9IiIsIAogICAgICAgIHlsYWI9IkxvZzIoQ291bnRzKSIsCiAgICAgICAgbGFzPTIsCiAgICAgICAgY29sPXN0YXR1c0NvbCkKIyBMZXQncyBhZGQgYSBibHVlIGhvcml6b250YWwgbGluZSB0aGF0IGNvcnJlc3BvbmRzIHRvIHRoZSBtZWRpYW4gbG9nQ1BNCmFibGluZShoPW1lZGlhbihhcy5tYXRyaXgobG9nY291bnRzKSksIGNvbD0iYmx1ZSIpCgpgYGAKCj4gIyMjIENoYWxsZW5nZSAyIHsuY2hhbGxlbmdlfQo+Cj4gUmVkbyB0aGUgaGVhdG1hcCB1c2luZyB0aGUgdG9wIDUwMCBMRUFTVCB2YXJpYWJsZSBnZW5lcy4KPiBDaGFuZ2UgdGhlIGNvbG91ciBzY2hlbWUgdG8gIlBpWUciIGFuZCByZWRvIHRoZSBoZWF0bWFwLiBUcnkgYD9SQ29sb3JCcmV3ZXJgIGFuZCBzZWUgd2hhdCBvdGhlciBjb2xvdXIgc2NoZW1lcyBhcmUgYXZhaWxhYmxlLgo+IENoYW5nZSB0aGUgc2FtcGxlIG5hbWVzIHRvIGBncm91cGAgdXNpbmcgdGhlIGBsYWJDb2xgIGFyZ3VtZW50Cj4gUmVtb3ZlIHRoZSBnZW5lIG5hbWVzIGZyb20gdGhlIHJpZ2h0aGFuZCBzaWRlIG9mIHRoZSBwbG90IHVzaW5nIGBsYWJSb3dgCgoqKlNvbHV0aW9uKioKYGBge3Igc29sdXRpb25DaGFsbGVuZ2UyLCBmaWcuaGVpZ2h0PTE1LCBmaWcud2lkdGg9MTB9CgojIEdldCB0aGUgZ2VuZSBuYW1lcyBmb3IgdGhlIHRvcCA1MDAgbGVhc3QgdmFyaWFibGUgZ2VuZXMKbG93VmFyIDwtIG9yZGVyKGNvdW50VmFyKVsxOjUwMF0KIyBTdWJzZXQgbG9nY291bnRzIG1hdHJpeApobURhdGEgPC0gcmxvZ2NvdW50c1tsb3dWYXIsXQoKIyMgR2V0IHNvbWUgbmljZXIgY29sb3VycwpteXBhbGV0dGUgPC0gYnJld2VyLnBhbCgxMSwiUGlZRyIpCiMjIGh0dHA6Ly9jb2xvcmJyZXdlcjIub3JnLyN0eXBlPXNlcXVlbnRpYWwmc2NoZW1lPUJ1R24mbj0zCm1vcmVjb2xzIDwtIGNvbG9yUmFtcFBhbGV0dGUobXlwYWxldHRlKQojIFNldCB1cCBjb2xvdXIgdmVjdG9yIGZvciBjZWxsdHlwZSB2YXJpYWJsZQpjb2wuY2VsbCA8LSBjKCJwdXJwbGUiLCJvcmFuZ2UiKVtzYW1wbGVpbmZvJENlbGxUeXBlXQoKIyBQbG90IHRoZSBoZWF0bWFwCmhlYXRtYXAuMihobURhdGEsIAogICAgICAgICAgY29sPXJldihtb3JlY29scyg1MCkpLAogICAgICAgICAgdHJhY2U9Im5vbmUiLCAKICAgICAgICAgIG1haW49IlRvcCA1MDAgbW9zdCB2YXJpYWJsZSBnZW5lcyBhY3Jvc3Mgc2FtcGxlcyIsCiAgICAgICAgICBDb2xTaWRlQ29sb3JzPWNvbC5jZWxsLHNjYWxlPSJyb3ciLAogICAgICAgICAgbGFiQ29sPXNhbXBsZWluZm8kR3JvdXAsIAogICAgICAgICAgbGFiUm93ID0gTkEpCgpgYGAKCj4gIyMjIENoYWxsZW5nZSAzCj4KPiBQbG90IHRoZSBiaWFzZWQgYW5kIHVuYmlhc2VkIE1BIHBsb3RzIGJvdGggc2FtcGxlcyBzaWRlIGJ5IHNpZGUgdG8gc2VlIHRoZSAKPiBiZWZvcmUgYW5kIGFmdGVyIG5vcm1hbGlzYXRpb24uCj4KCmBgYHtyIHNvbHV0aW9uQ2hhbGxlbmdlMywgZmlnLmhlaWdodD0xMCwgZmlnLndpZHRoPTEwfQpwYXIobWZyb3c9YygyLDIpKQpwbG90TUEobG9nY291bnRzLCBhcnJheSA9IDcsIG1haW49Ik1DTDEuTEEgLSBSYXciKQphYmxpbmUoaD0wLGNvbD0iZ3JleSIpCnBsb3RNQShsb2dOb3JtYWxpemVkQ291bnRzLCBhcnJheSA9IDcsIG1haW49Ik1DTDEuTEEgLSBOb3JtYWxpc2VkIikKYWJsaW5lKGg9MCxjb2w9ImdyZXkiKQpwbG90TUEobG9nY291bnRzLCBhcnJheSA9IDExLCBtYWluPSJNQ0wxLkxFIC0gUmF3IikKYWJsaW5lKGg9MCxjb2w9ImdyZXkiKQpwbG90TUEobG9nTm9ybWFsaXplZENvdW50cywgYXJyYXkgPSAxMSwgbWFpbj0iTUNMMS5MRSAtIE5vcm1hbGlzZWQiKQphYmxpbmUoaD0wLGNvbD0iZ3JleSIpCmBgYA==