Data
library(DESeq2)
library(gplots)
library(RColorBrewer)
library(limma)
library(tidyverse)
# Read the sample information into R
sampleinfo <- read.delim("../data/SampleInfo.Corrected.txt",
stringsAsFactors = F)
# Read the data into R
seqdata <- read.delim("../data/GSE60450_Lactation.featureCounts",
stringsAsFactors = F,
comment = "#")
# Remove first two columns from seqdata
countdata <- as.data.frame(seqdata) %>%
column_to_rownames("Geneid") %>% # turn the geneid column into rownames
rename_all(str_remove, ".bam") %>% # remove the ".bam" from the column names
select(sampleinfo$Sample) %>% # keep sample columns using sampleinfo
as.matrix()
# filter data
keep <- rowSums(countdata) > 5
countdata <- countdata[keep,]
# rlogcounts
rlogcounts <- rlog(countdata)
# We estimate the variance for each row in the logcounts matrix
countVar <- apply(rlogcounts, 1, var)
# DGE list
design <- as.formula(~ CellType)
# create the DESeqDataSet object
ddsObj <- DESeqDataSetFromMatrix(countData = countdata,
colData = sampleinfo,
design = design)
some variables in design formula are characters, converting to factors
ddsObj <- estimateSizeFactors(ddsObj)
# normalise dcounts
logcounts <- log2(countdata + 1)
normalizedCounts <- counts(ddsObj, normalized=TRUE)
logNormalizedCounts <- log2(normalizedCounts + 1)
Challenge 1
- Use the
DESeq2
function rlog
to transform the count data. This function also normalises for library size.
- Plot the count distribution boxplots with this data How has this effected the count distributions?
rlogcounts <- rlog(countdata)
statusCol <- as.numeric(factor(sampleinfo$Status)) + 1
# Check distributions of samples using boxplots
boxplot(rlogcounts,
xlab="",
ylab="Log2(Counts)",
las=2,
col=statusCol)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(as.matrix(logcounts)), col="blue")
Challenge 2
Redo the heatmap using the top 500 LEAST variable genes. Change the colour scheme to “PiYG” and redo the heatmap. Try ?RColorBrewer
and see what other colour schemes are available. Change the sample names to group
using the labCol
argument Remove the gene names from the righthand side of the plot using labRow
Solution
# Get the gene names for the top 500 least variable genes
lowVar <- order(countVar)[1:500]
# Subset logcounts matrix
hmData <- rlogcounts[lowVar,]
## Get some nicer colours
mypalette <- brewer.pal(11,"PiYG")
## http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- c("purple","orange")[sampleinfo$CellType]
# Plot the heatmap
heatmap.2(hmData,
col=rev(morecols(50)),
trace="none",
main="Top 500 most variable genes across samples",
ColSideColors=col.cell,scale="row",
labCol=sampleinfo$Group,
labRow = NA)
Challenge 3
Plot the biased and unbiased MA plots both samples side by side to see the before and after normalisation.
par(mfrow=c(2,2))
plotMA(logcounts, array = 7, main="MCL1.LA - Raw")
abline(h=0,col="grey")
plotMA(logNormalizedCounts, array = 7, main="MCL1.LA - Normalised")
abline(h=0,col="grey")
plotMA(logcounts, array = 11, main="MCL1.LE - Raw")
abline(h=0,col="grey")
plotMA(logNormalizedCounts, array = 11, main="MCL1.LE - Normalised")
abline(h=0,col="grey")
LS0tCnRpdGxlOiAiUk5BLXNlcSBhbmFseXNpcyBpbiBSIgphdXRob3I6ICJTdGVwaGFuZSBCYWxsZXJlYXUsIE1hcmsgRHVubmluZywgT3NjYXIgUnVlZGEsIEFzaGxleSBTYXdsZSIKZGF0ZTogJ2ByIGZvcm1hdChTeXMudGltZSgpLCAiTGFzdCBtb2RpZmllZDogJWQgJWIgJVkiKWAnCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwpsYXlvdXQ6IHBhZ2UKc3VidGl0bGU6IFByZS1wcm9jZXNzc2luZyBSTkEtc2VxIGRhdGEgLSBDaGFsbGVuZ2UgU29sdXRpb25zCi0tLQoKIyMgRGF0YSAKCmBgYHtyIHNldHVwLCBtZXNzYWdlID0gRkFMU0V9CmxpYnJhcnkoREVTZXEyKQpsaWJyYXJ5KGdwbG90cykKbGlicmFyeShSQ29sb3JCcmV3ZXIpCmxpYnJhcnkobGltbWEpCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCmBgYHtyIHByZXBhcmVEYXRhLCBtZXNzYWdlPUZBTFNFfQojIFJlYWQgdGhlIHNhbXBsZSBpbmZvcm1hdGlvbiBpbnRvIFIKc2FtcGxlaW5mbyA8LSByZWFkLmRlbGltKCIuLi9kYXRhL1NhbXBsZUluZm8uQ29ycmVjdGVkLnR4dCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IEYpCiMgUmVhZCB0aGUgZGF0YSBpbnRvIFIKc2VxZGF0YSA8LSByZWFkLmRlbGltKCIuLi9kYXRhL0dTRTYwNDUwX0xhY3RhdGlvbi5mZWF0dXJlQ291bnRzIiwgCiAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gRiwKICAgICAgICAgICAgICAgICAgICAgIGNvbW1lbnQgPSAiIyIpCiMgUmVtb3ZlIGZpcnN0IHR3byBjb2x1bW5zIGZyb20gc2VxZGF0YQpjb3VudGRhdGEgPC0gYXMuZGF0YS5mcmFtZShzZXFkYXRhKSAlPiUgCiAgICBjb2x1bW5fdG9fcm93bmFtZXMoIkdlbmVpZCIpICU+JSAjIHR1cm4gdGhlIGdlbmVpZCBjb2x1bW4gaW50byByb3duYW1lcwogICAgcmVuYW1lX2FsbChzdHJfcmVtb3ZlLCAiLmJhbSIpICU+JSAjIHJlbW92ZSB0aGUgIi5iYW0iIGZyb20gdGhlIGNvbHVtbiBuYW1lcwogICAgc2VsZWN0KHNhbXBsZWluZm8kU2FtcGxlKSAlPiUgIyBrZWVwIHNhbXBsZSBjb2x1bW5zIHVzaW5nIHNhbXBsZWluZm8KICAgIGFzLm1hdHJpeCgpCiMgZmlsdGVyIGRhdGEKa2VlcCA8LSByb3dTdW1zKGNvdW50ZGF0YSkgPiA1CmNvdW50ZGF0YSA8LSBjb3VudGRhdGFba2VlcCxdCiMgcmxvZ2NvdW50cwpybG9nY291bnRzIDwtIHJsb2coY291bnRkYXRhKQojIFdlIGVzdGltYXRlIHRoZSB2YXJpYW5jZSBmb3IgZWFjaCByb3cgaW4gdGhlIGxvZ2NvdW50cyBtYXRyaXgKY291bnRWYXIgPC0gYXBwbHkocmxvZ2NvdW50cywgMSwgdmFyKQojIERHRSBsaXN0CmRlc2lnbiA8LSBhcy5mb3JtdWxhKH4gQ2VsbFR5cGUpCiMgY3JlYXRlIHRoZSBERVNlcURhdGFTZXQgb2JqZWN0CmRkc09iaiA8LSBERVNlcURhdGFTZXRGcm9tTWF0cml4KGNvdW50RGF0YSA9IGNvdW50ZGF0YSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sRGF0YSA9IHNhbXBsZWluZm8sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlc2lnbiA9IGRlc2lnbikKZGRzT2JqIDwtIGVzdGltYXRlU2l6ZUZhY3RvcnMoZGRzT2JqKQojIG5vcm1hbGlzZSBkY291bnRzCmxvZ2NvdW50cyA8LSBsb2cyKGNvdW50ZGF0YSArIDEpCm5vcm1hbGl6ZWRDb3VudHMgPC0gY291bnRzKGRkc09iaiwgbm9ybWFsaXplZD1UUlVFKSAKbG9nTm9ybWFsaXplZENvdW50cyA8LSBsb2cyKG5vcm1hbGl6ZWRDb3VudHMgKyAxKQpgYGAKCj4gIyMjIENoYWxsZW5nZSAxCj4KPiAxLiBVc2UgdGhlIGBERVNlcTJgIGZ1bmN0aW9uIGBybG9nYCB0byB0cmFuc2Zvcm0gdGhlIGNvdW50IGRhdGEuIFRoaXMgZnVuY3Rpb24KPiBhbHNvIG5vcm1hbGlzZXMgZm9yIGxpYnJhcnkgc2l6ZS4KPiAyLiBQbG90IHRoZSBjb3VudCBkaXN0cmlidXRpb24gYm94cGxvdHMgd2l0aCB0aGlzIGRhdGEKPiBIb3cgaGFzIHRoaXMgZWZmZWN0ZWQgdGhlIGNvdW50IGRpc3RyaWJ1dGlvbnM/CgpgYGB7cn0KcmxvZ2NvdW50cyA8LSBybG9nKGNvdW50ZGF0YSkKCnN0YXR1c0NvbCA8LSBhcy5udW1lcmljKGZhY3RvcihzYW1wbGVpbmZvJFN0YXR1cykpICsgMQojIENoZWNrIGRpc3RyaWJ1dGlvbnMgb2Ygc2FtcGxlcyB1c2luZyBib3hwbG90cwpib3hwbG90KHJsb2djb3VudHMsIAogICAgICAgIHhsYWI9IiIsIAogICAgICAgIHlsYWI9IkxvZzIoQ291bnRzKSIsCiAgICAgICAgbGFzPTIsCiAgICAgICAgY29sPXN0YXR1c0NvbCkKIyBMZXQncyBhZGQgYSBibHVlIGhvcml6b250YWwgbGluZSB0aGF0IGNvcnJlc3BvbmRzIHRvIHRoZSBtZWRpYW4gbG9nQ1BNCmFibGluZShoPW1lZGlhbihhcy5tYXRyaXgobG9nY291bnRzKSksIGNvbD0iYmx1ZSIpCgpgYGAKCj4gIyMjIENoYWxsZW5nZSAyIHsuY2hhbGxlbmdlfQo+Cj4gUmVkbyB0aGUgaGVhdG1hcCB1c2luZyB0aGUgdG9wIDUwMCBMRUFTVCB2YXJpYWJsZSBnZW5lcy4KPiBDaGFuZ2UgdGhlIGNvbG91ciBzY2hlbWUgdG8gIlBpWUciIGFuZCByZWRvIHRoZSBoZWF0bWFwLiBUcnkgYD9SQ29sb3JCcmV3ZXJgIGFuZCBzZWUgd2hhdCBvdGhlciBjb2xvdXIgc2NoZW1lcyBhcmUgYXZhaWxhYmxlLgo+IENoYW5nZSB0aGUgc2FtcGxlIG5hbWVzIHRvIGBncm91cGAgdXNpbmcgdGhlIGBsYWJDb2xgIGFyZ3VtZW50Cj4gUmVtb3ZlIHRoZSBnZW5lIG5hbWVzIGZyb20gdGhlIHJpZ2h0aGFuZCBzaWRlIG9mIHRoZSBwbG90IHVzaW5nIGBsYWJSb3dgCgoqKlNvbHV0aW9uKioKYGBge3Igc29sdXRpb25DaGFsbGVuZ2UyLCBmaWcuaGVpZ2h0PTE1LCBmaWcud2lkdGg9MTB9CgojIEdldCB0aGUgZ2VuZSBuYW1lcyBmb3IgdGhlIHRvcCA1MDAgbGVhc3QgdmFyaWFibGUgZ2VuZXMKbG93VmFyIDwtIG9yZGVyKGNvdW50VmFyKVsxOjUwMF0KIyBTdWJzZXQgbG9nY291bnRzIG1hdHJpeApobURhdGEgPC0gcmxvZ2NvdW50c1tsb3dWYXIsXQoKIyMgR2V0IHNvbWUgbmljZXIgY29sb3VycwpteXBhbGV0dGUgPC0gYnJld2VyLnBhbCgxMSwiUGlZRyIpCiMjIGh0dHA6Ly9jb2xvcmJyZXdlcjIub3JnLyN0eXBlPXNlcXVlbnRpYWwmc2NoZW1lPUJ1R24mbj0zCm1vcmVjb2xzIDwtIGNvbG9yUmFtcFBhbGV0dGUobXlwYWxldHRlKQojIFNldCB1cCBjb2xvdXIgdmVjdG9yIGZvciBjZWxsdHlwZSB2YXJpYWJsZQpjb2wuY2VsbCA8LSBjKCJwdXJwbGUiLCJvcmFuZ2UiKVtzYW1wbGVpbmZvJENlbGxUeXBlXQoKIyBQbG90IHRoZSBoZWF0bWFwCmhlYXRtYXAuMihobURhdGEsIAogICAgICAgICAgY29sPXJldihtb3JlY29scyg1MCkpLAogICAgICAgICAgdHJhY2U9Im5vbmUiLCAKICAgICAgICAgIG1haW49IlRvcCA1MDAgbW9zdCB2YXJpYWJsZSBnZW5lcyBhY3Jvc3Mgc2FtcGxlcyIsCiAgICAgICAgICBDb2xTaWRlQ29sb3JzPWNvbC5jZWxsLHNjYWxlPSJyb3ciLAogICAgICAgICAgbGFiQ29sPXNhbXBsZWluZm8kR3JvdXAsIAogICAgICAgICAgbGFiUm93ID0gTkEpCgpgYGAKCj4gIyMjIENoYWxsZW5nZSAzCj4KPiBQbG90IHRoZSBiaWFzZWQgYW5kIHVuYmlhc2VkIE1BIHBsb3RzIGJvdGggc2FtcGxlcyBzaWRlIGJ5IHNpZGUgdG8gc2VlIHRoZSAKPiBiZWZvcmUgYW5kIGFmdGVyIG5vcm1hbGlzYXRpb24uCj4KCmBgYHtyIHNvbHV0aW9uQ2hhbGxlbmdlMywgZmlnLmhlaWdodD0xMCwgZmlnLndpZHRoPTEwfQpwYXIobWZyb3c9YygyLDIpKQpwbG90TUEobG9nY291bnRzLCBhcnJheSA9IDcsIG1haW49Ik1DTDEuTEEgLSBSYXciKQphYmxpbmUoaD0wLGNvbD0iZ3JleSIpCnBsb3RNQShsb2dOb3JtYWxpemVkQ291bnRzLCBhcnJheSA9IDcsIG1haW49Ik1DTDEuTEEgLSBOb3JtYWxpc2VkIikKYWJsaW5lKGg9MCxjb2w9ImdyZXkiKQpwbG90TUEobG9nY291bnRzLCBhcnJheSA9IDExLCBtYWluPSJNQ0wxLkxFIC0gUmF3IikKYWJsaW5lKGg9MCxjb2w9ImdyZXkiKQpwbG90TUEobG9nTm9ybWFsaXplZENvdW50cywgYXJyYXkgPSAxMSwgbWFpbj0iTUNMMS5MRSAtIE5vcm1hbGlzZWQiKQphYmxpbmUoaD0wLGNvbD0iZ3JleSIpCmBgYA==