Challenge 1
Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue. 1. Rank the genes by statisical significance - you will need to create a new ranking value using
-log10({p value}) * sign({Fold Change})
2. Load the “C2” pathways from the thedata/mouse_c2_v5.RData
file 3. Runfgsea
using the new ranked genes and the C2 pathways 4. Runfgsea
using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC)
names(ranks) <- gseaDat$Entrez
load("../Robjects/mouse_c2_v5.RData")
pathwaysC2 <- Mm.c2
fgseaResC2 <- fgsea(pathwaysC2, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysC2, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResC2[order(padj, -abs(NES)), ], n=10)
## pathway
## 1: REACTOME_INFLUENZA_LIFE_CYCLE
## 2: REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
## 3: REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
## 4: REACTOME_PEPTIDE_CHAIN_ELONGATION
## 5: REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
## 6: KEGG_RIBOSOME
## 7: REACTOME_METABOLISM_OF_RNA
## 8: REACTOME_METABOLISM_OF_MRNA
## 9: REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## 10: REACTOME_TRANSLATION
## pval padj ES NES nMoreExtreme size
## 1: 0.002267574 0.05313355 -0.8585177 -2.673167 0 128
## 2: 0.002331002 0.05313355 -0.8873458 -2.649677 0 100
## 3: 0.002331002 0.05313355 -0.8855713 -2.644379 0 100
## 4: 0.002132196 0.05313355 -0.9083812 -2.643275 0 79
## 5: 0.002314815 0.05313355 -0.8842923 -2.631870 0 95
## 6: 0.002136752 0.05313355 -0.8967344 -2.613460 0 81
## 7: 0.002320186 0.05313355 -0.7546201 -2.555758 0 244
## 8: 0.002403846 0.05313355 -0.7740333 -2.552145 0 202
## 9: 0.002320186 0.05313355 -0.8426403 -2.540986 0 103
## 10: 0.002257336 0.05313355 -0.8000658 -2.521469 0 141
## leadingEdge
## 1: 20085,19982,269261,20084,20103,65019,...
## 2: 20085,19982,269261,20084,20103,74133,...
## 3: 20085,19982,269261,20084,20103,65019,...
## 4: 20085,19982,269261,20084,20103,65019,...
## 5: 20085,19982,269261,20084,20103,65019,...
## 6: 20085,19982,269261,20084,20103,65019,...
## 7: 20085,19982,269261,20084,20103,74133,...
## 8: 20085,19982,269261,20084,20103,74133,...
## 9: 20085,19982,269261,20084,20103,65019,...
## 10: 20085,19982,269261,20084,20103,65019,...
fgseaResH <- fgsea(pathwaysH, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResH[order(padj, -abs(NES)), ], n=20)
## pathway pval padj ES
## 1: HALLMARK_MYC_TARGETS_V1 0.002232143 0.008585165 -0.6826742
## 2: HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.001834862 0.008585165 0.6475885
## 3: HALLMARK_TGF_BETA_SIGNALING 0.002100840 0.008585165 -0.7419824
## 4: HALLMARK_MYC_TARGETS_V2 0.002109705 0.008585165 -0.7011704
## 5: HALLMARK_CHOLESTEROL_HOMEOSTASIS 0.001964637 0.008585165 0.7008853
## 6: HALLMARK_E2F_TARGETS 0.002232143 0.008585165 -0.5619850
## 7: HALLMARK_ADIPOGENESIS 0.001824818 0.008585165 0.5730449
## 8: HALLMARK_FATTY_ACID_METABOLISM 0.001872659 0.008585165 0.5762348
## 9: HALLMARK_MTORC1_SIGNALING 0.001818182 0.008585165 0.5499793
## 10: HALLMARK_WNT_BETA_CATENIN_SIGNALING 0.002178649 0.008585165 -0.6575068
## 11: HALLMARK_G2M_CHECKPOINT 0.002232143 0.008585165 -0.5058712
## 12: HALLMARK_ESTROGEN_RESPONSE_EARLY 0.002197802 0.008585165 -0.4818514
## 13: HALLMARK_ALLOGRAFT_REJECTION 0.002188184 0.008585165 -0.4604850
## 14: HALLMARK_PEROXISOME 0.003824092 0.013657471 0.6087801
## 15: HALLMARK_NOTCH_SIGNALING 0.006263048 0.020649780 -0.6956626
## 16: HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.006607930 0.020649780 -0.4490023
## 17: HALLMARK_BILE_ACID_METABOLISM 0.009541985 0.028064661 0.5475702
## 18: HALLMARK_DNA_REPAIR 0.012820513 0.035612536 -0.4592440
## 19: HALLMARK_ESTROGEN_RESPONSE_LATE 0.026431718 0.069557153 -0.4157999
## 20: HALLMARK_PROTEIN_SECRETION 0.032755299 0.081888247 0.4990399
## NES nMoreExtreme size
## 1: -2.234988 0 201
## 2: 2.064359 0 196
## 3: -2.058291 0 54
## 4: -1.964133 0 58
## 5: 1.936817 0 73
## 6: -1.839867 0 201
## 7: 1.829935 0 199
## 8: 1.771772 0 155
## 9: 1.758359 0 203
## 10: -1.709298 0 40
## 11: -1.656157 0 201
## 12: -1.568230 0 193
## 13: -1.514062 0 202
## 14: 1.760625 1 95
## 15: -1.733122 2 31
## 16: -1.471610 2 199
## 17: 1.585845 4 98
## 18: -1.449557 5 150
## 19: -1.359037 11 197
## 20: 1.437826 16 96
## leadingEdge
## 1: 23918,20103,12567,27050,20104,14694,...
## 2: 108664,73834,20930,67863,12039,109754,...
## 3: 15901,17131,15903,16997,13805,20482,...
## 4: 12567,18148,53608,13537,101612,235036,...
## 5: 170459,16835,208715,63913,319554,235293,...
## 6: 12704,15374,12567,218210,16201,20492,...
## 7: 12683,227095,13476,18405,72157,12039,...
## 8: 12683,208715,227095,319554,12613,18194,...
## 9: 12450,170459,16835,208715,63913,319554,...
## 10: 18128,22419,15213,12006,20602,21414,...
## 11: 15374,17975,12567,16201,105727,27041,...
## 12: 11839,22403,27205,98952,16763,12614,...
## 13: 20085,16156,20005,18148,76846,20091,...
## 14: 16413,19299,319554,12613,14712,66887,...
## 15: 18128,109689,74198,15205,21402,56198,...
## 16: 11839,12156,16878,18124,13537,14528,...
## 17: 19299,319554,14712,66887,18642,14711,...
## 18: 23918,11636,20016,100042069,21681,69241,...
## 19: 14184,11839,22403,98952,14620,69540,...
## 20: 108664,22319,56382,99371,69938,12176,...
Challenge 2
- Create a gene list for genes that are up-regulated by at least 4x (logFC>2) in lactating mice
- Run a
goseq
analysis on this gene list- Plot the results
- How is this result different to the previous GO analysis?
Look at genes that are up regulated by at least 4x (logFC>2) in lactating mices
isSigGeneUp <- shrinkLvV$FDR < 0.01 &
!is.na(shrinkLvV$FDR) &
shrinkLvV$logFC > 2
genesUp <- as.integer(isSigGeneUp)
names(genesUp) <- shrinkLvV$GeneID
pwf <- nullp(genesUp, "mm10", "ensGene", bias.data = shrinkLvV$medianTxLength)
## Warning in pcls(G): initial point very close to some inequality constraints
goResultsUp <- goseq(pwf, "mm10","ensGene", test.cats=c("GO:BP"))
goResultsUp %>%
top_n(10, wt=-over_represented_pvalue) %>%
mutate(hitsPerc=numDEInCat*100/numInCat) %>%
ggplot(aes(x=hitsPerc,
y=term,
colour=over_represented_pvalue,
size=numDEInCat)) +
geom_point() +
expand_limits(x=0) +
labs(x="Hits (%)", y="GO term", colour="p value", size="Count")
Challenge 3
- Use
pathview
to export a figure for “mmu04060”, but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)
logFC <- annotLvV$logFC[sigGenes]
names(logFC) <- annotLvV$Entrez[sigGenes]
pathview(gene.data = logFC,
pathway.id = "mmu04060",
species = "mmu",
limit = list(gene=5, cpd=1))
## Info: Downloading xml files for mmu04060, 1/1 pathways..
## Info: Downloading png files for mmu04060, 1/1 pathways..
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/participant/Documents/CRUK_CI_RNAseq_course/Course_Materials/solutions
## Info: Writing image file mmu04060.pathview.png
mmu04060.pathview.png: