Challenge 1
Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue. 1. Rank the genes by statisical significance - you will need to create a new ranking value using -log10({p value}) * sign({Fold Change})
2. Load the “C2” pathways from the the data/mouse_c2_v5.RData
file 3. Run fgsea
using the new ranked genes and the C2 pathways 4. Run fgsea
using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC)
names(ranks) <- gseaDat$Entrez
load("../Robjects/mouse_c2_v5.RData")
pathwaysC2 <- Mm.c2
fgseaResC2 <- fgsea(pathwaysC2, ranks, minSize=15, maxSize = 500, nperm=1000)
There are ties in the preranked stats (0.04% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResC2[order(padj, -abs(NES)), ], n=10)
fgseaResH <- fgsea(pathwaysH, ranks, minSize=15, maxSize = 500, nperm=1000)
There are ties in the preranked stats (0.04% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResH[order(padj, -abs(NES)), ], n=20)
Challenge 2
- Create a gene list for genes that are up-regulated by at least 4x (logFC>2) in lactating mice
- Run a
goseq
analysis on this gene list
- Plot the results
- How is this result different to the previous GO analysis?
Look at genes that are up regulated by at least 4x (logFC>2) in lactating mices
isSigGeneUp <- shrinkLvV$FDR < 0.01 &
!is.na(shrinkLvV$FDR) &
shrinkLvV$logFC > 2
genesUp <- as.integer(isSigGeneUp)
names(genesUp) <- shrinkLvV$GeneID
pwf <- nullp(genesUp, "mm10", "ensGene", bias.data = shrinkLvV$medianTxLength)
initial point very close to some inequality constraints
goResultsUp <- goseq(pwf, "mm10","ensGene", test.cats=c("GO:BP"))
goResultsUp %>%
top_n(10, wt=-over_represented_pvalue) %>%
mutate(hitsPerc=numDEInCat*100/numInCat) %>%
ggplot(aes(x=hitsPerc,
y=term,
colour=over_represented_pvalue,
size=numDEInCat)) +
geom_point() +
expand_limits(x=0) +
labs(x="Hits (%)", y="GO term", colour="p value", size="Count")
Challenge 3
- Use
pathview
to export a figure for “mmu04060”, but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)
logFC <- annotLvV$logFC[sigGenes]
names(logFC) <- annotLvV$Entrez[sigGenes]
pathview(gene.data = logFC,
pathway.id = "mmu04060",
species = "mmu",
limit = list(gene=5, cpd=1))
Info: Downloading xml files for mmu04060, 1/1 pathways..
Info: Downloading png files for mmu04060, 1/1 pathways..
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /Users/sawle01/Documents/RNAseq_course/CRUK_CI_RNAseq_course/Course_Materials/solutions
Info: Writing image file mmu04060.pathview.png
mmu04060.pathview.png:
LS0tCnRpdGxlOiAiUk5BLXNlcSBhbmFseXNpcyBpbiBSIgphdXRob3I6ICJTdGVwaGFuZSBCYWxsZXJlYXUsIE1hcmsgRHVubmluZywgT3NjYXIgUnVlZGEsIEFzaGxleSBTYXdsZSIKZGF0ZTogJ2ByIGZvcm1hdChTeXMudGltZSgpLCAiTGFzdCBtb2RpZmllZDogJWQgJWIgJVkiKWAnCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDogeWVzCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwptaW51dGVzOiAzMDAKbGF5b3V0OiBwYWdlCnN1YnRpdGxlOiBHZW5lIFNldCBUZXN0aW5nIGZvciBSTkEtc2VxIC0gU29sdXRpb25zCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGZnc2VhKQpsaWJyYXJ5KGdvc2VxKQpsaWJyYXJ5KGNsdXN0ZXJQcm9maWxlcikKbGlicmFyeShwYXRodmlldykKYGBgCgpgYGB7ciBwcmVwYXJlRGF0YSwgaW5jbHVkZT1GQUxTRX0KbG9hZCgiLi4vUm9iamVjdHMvQW5ub3RhdGVkX1Jlc3VsdHNfTHZWLlJEYXRhIikKIyBmZ3NlYSBkYXRhCmdzZWFEYXQgPC0gZmlsdGVyKHNocmlua0x2ViwgIWlzLm5hKEVudHJleikpCnJhbmtzIDwtIGdzZWFEYXQkbG9nRkMKbG9hZCgiLi4vUm9iamVjdHMvbW91c2VfSF92NS5SRGF0YSIpCnBhdGh3YXlzSCA8LSBNbS5ICiMgS2VnZyBkYXRhCnNpZ0dlbmVzIDwtIHNocmlua0x2ViRFbnRyZXpbIHNocmlua0x2ViRGRFIgPCAwLjAxICYgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICFpcy5uYShzaHJpbmtMdlYkRkRSKSAmCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFicyhzaHJpbmtMdlYkbG9nRkMpID4gMSBdCnNpZ0dlbmVzIDwtIG5hLmV4Y2x1ZGUoc2lnR2VuZXMpCmtrIDwtIGVucmljaEtFR0coZ2VuZSA9IHNpZ0dlbmVzLCBvcmdhbmlzbSA9ICdtbXUnKQpgYGAKCj4gIyMgQ2hhbGxlbmdlIDEgey5jaGFsbGVuZ2V9Cj4KPiBBbm90aGVyIGNvbW1vbiB3YXkgdG8gcmFuayB0aGUgZ2VuZXMgaXMgdG8gb3JkZXIgYnkgcHZhbHVlLCBidXQgYWxzbywgc29ydGluZwo+IHNvIHRoYXQgdXByZWd1bGF0ZWQgZ2VuZXMgYXJlIGF0IHN0YXJ0IGFuZCBkb3ducmVndWxhdGVkIGF0IHRoZSBvdGhlciAtIAo+IHlvdSBjYW4gZG8gdGhpcyBjb21iaW5pbmcgdGhlIHNpZ24gb2YgdGhlIGZvbGQgY2hhbmdlIGFuZCB0aGUgcHZhbHVlLgo+IDEuIFJhbmsgdGhlIGdlbmVzIGJ5IHN0YXRpc2ljYWwgc2lnbmlmaWNhbmNlIC0geW91IHdpbGwgbmVlZCB0byBjcmVhdGUKPiBhIG5ldyByYW5raW5nIHZhbHVlIHVzaW5nIGAtbG9nMTAoe3AgdmFsdWV9KSAqIHNpZ24oe0ZvbGQgQ2hhbmdlfSlgCj4gMi4gTG9hZCB0aGUgIkMyIiBwYXRod2F5cyBmcm9tIHRoZSB0aGUgYGRhdGEvbW91c2VfYzJfdjUuUkRhdGFgIGZpbGUKPiAzLiBSdW4gYGZnc2VhYCB1c2luZyB0aGUgbmV3IHJhbmtlZCBnZW5lcyBhbmQgdGhlIEMyIHBhdGh3YXlzCj4gNC4gUnVuIGBmZ3NlYWAgdXNpbmcgdGhlIG5ldyByYW5rZWQgZ2VuZXMgYW5kIHRoZSBIIHBhdGh3YXlzLiBIb3cgZG8gdGhlc2UgCj4gcmVzdWx0cyBkaWZmZXIgZnJvbSB0aGUgb25lcyB3ZSBnb3Qgd2hlbiByYW5raW5nIGJ5IHRoZSBmb2xkIGNoYW5nZSBhbG9uZT8KCmBgYHtyIHNvbHV0aW9uMX0KcmFua3MgPC0gLWxvZzEwKGdzZWFEYXQkcHZhbHVlKSAqIHNpZ24oZ3NlYURhdCRsb2dGQykKbmFtZXMocmFua3MpIDwtIGdzZWFEYXQkRW50cmV6ICAKCmxvYWQoIi4uL1JvYmplY3RzL21vdXNlX2MyX3Y1LlJEYXRhIikKcGF0aHdheXNDMiA8LSBNbS5jMgoKZmdzZWFSZXNDMiA8LSBmZ3NlYShwYXRod2F5c0MyLCByYW5rcywgbWluU2l6ZT0xNSwgbWF4U2l6ZSA9IDUwMCwgbnBlcm09MTAwMCkKaGVhZChmZ3NlYVJlc0MyW29yZGVyKHBhZGosIC1hYnMoTkVTKSksIF0sIG49MTApCgpmZ3NlYVJlc0ggPC0gZmdzZWEocGF0aHdheXNILCByYW5rcywgbWluU2l6ZT0xNSwgbWF4U2l6ZSA9IDUwMCwgbnBlcm09MTAwMCkKaGVhZChmZ3NlYVJlc0hbb3JkZXIocGFkaiwgLWFicyhORVMpKSwgXSwgbj0yMCkKCmBgYAoKPiAjIyBDaGFsbGVuZ2UgMiB7LmNoYWxsZW5nZX0KPgo+IDEuIENyZWF0ZSBhIGdlbmUgbGlzdCBmb3IgZ2VuZXMgdGhhdCBhcmUgdXAtcmVndWxhdGVkIGJ5IGF0IGxlYXN0IDR4IChsb2dGQz4yKQo+IGluIGxhY3RhdGluZyBtaWNlCj4gMi4gUnVuIGEgYGdvc2VxYCBhbmFseXNpcyBvbiB0aGlzIGdlbmUgbGlzdAo+IDMuIFBsb3QgdGhlIHJlc3VsdHMKPiA0LiBIb3cgaXMgdGhpcyByZXN1bHQgZGlmZmVyZW50IHRvIHRoZSBwcmV2aW91cyBHTyBhbmFseXNpcz8KCkxvb2sgYXQgZ2VuZXMgdGhhdCBhcmUgdXAgcmVndWxhdGVkIGJ5IGF0IGxlYXN0IDR4IChsb2dGQz4yKSBpbiBsYWN0YXRpbmcgbWljZXMKCmBgYHtyIHNvbHV0aW9uMiwgbWVzc2FnZT1GQUxTRX0KaXNTaWdHZW5lVXAgPC0gc2hyaW5rTHZWJEZEUiA8IDAuMDEgJgogICAgIWlzLm5hKHNocmlua0x2ViRGRFIpICYKICAgIHNocmlua0x2ViRsb2dGQyA+IDIKCmdlbmVzVXAgPC0gYXMuaW50ZWdlcihpc1NpZ0dlbmVVcCkKbmFtZXMoZ2VuZXNVcCkgPC0gc2hyaW5rTHZWJEdlbmVJRAoKcHdmIDwtIG51bGxwKGdlbmVzVXAsICJtbTEwIiwgImVuc0dlbmUiLCBiaWFzLmRhdGEgPSBzaHJpbmtMdlYkbWVkaWFuVHhMZW5ndGgpCgpnb1Jlc3VsdHNVcCA8LSBnb3NlcShwd2YsICJtbTEwIiwiZW5zR2VuZSIsIHRlc3QuY2F0cz1jKCJHTzpCUCIpKQoKZ29SZXN1bHRzVXAgJT4lCiAgICB0b3BfbigxMCwgd3Q9LW92ZXJfcmVwcmVzZW50ZWRfcHZhbHVlKSAlPiUKICAgIG11dGF0ZShoaXRzUGVyYz1udW1ERUluQ2F0KjEwMC9udW1JbkNhdCkgJT4lIAogICAgZ2dwbG90KGFlcyh4PWhpdHNQZXJjLCAKICAgICAgICAgICAgICAgeT10ZXJtLCAKICAgICAgICAgICAgICAgY29sb3VyPW92ZXJfcmVwcmVzZW50ZWRfcHZhbHVlLCAKICAgICAgICAgICAgICAgc2l6ZT1udW1ERUluQ2F0KSkgKwogICAgICAgIGdlb21fcG9pbnQoKSArCiAgICAgICAgZXhwYW5kX2xpbWl0cyh4PTApICsKICAgICAgICBsYWJzKHg9IkhpdHMgKCUpIiwgeT0iR08gdGVybSIsIGNvbG91cj0icCB2YWx1ZSIsIHNpemU9IkNvdW50IikKYGBgCgo+ICMjIENoYWxsZW5nZSAzIHsuY2hhbGxlbmdlfQo+Cj4gMS4gVXNlIGBwYXRodmlld2AgdG8gZXhwb3J0IGEgZmlndXJlIGZvciAibW11MDQwNjAiLCBidXQgdGhpcyB0aW1lIG9ubHkKPiB1c2UgZ2VuZXMgdGhhdCBhcmUgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBhdCBGRFIgPCAwLjAxCgpgYGB7ciBzb2x1dGlvbjN9CnNpZ0dlbmVzIDwtIHNocmlua0x2ViRGRFIgPCAwLjAxICYgIWlzLm5hKHNocmlua0x2ViRGRFIpCgpsb2dGQyA8LSBhbm5vdEx2ViRsb2dGQ1tzaWdHZW5lc10KbmFtZXMobG9nRkMpIDwtIGFubm90THZWJEVudHJleltzaWdHZW5lc10KCnBhdGh2aWV3KGdlbmUuZGF0YSA9IGxvZ0ZDLCAKICAgICAgICAgcGF0aHdheS5pZCA9ICJtbXUwNDA2MCIsIAogICAgICAgICBzcGVjaWVzID0gIm1tdSIsIAogICAgICAgICBsaW1pdCA9IGxpc3QoZ2VuZT01LCBjcGQ9MSkpCmBgYAoKbW11MDQwNjAucGF0aHZpZXcucG5nOgoKIVttbXUwNDA2MCAtQ3l0b2tpbmUtY3l0b2tpbmUgcmVjZXB0b3IgaW50ZXJhY3Rpb25dKC4uLy4uL2ltYWdlcy9tbXUwNDA2MC5wYXRodmlldy5wbmcpCgo=