Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue.
- Rank the genes by statisical significance - you will need to create a new ranking value using
-log10({p value}) * sign({Fold Change})
- Load the “C2” pathways from the the
data/mouse_c2_v5.RData
file
- Run
fgsea
using the new ranked genes and the C2 pathways
- Run
fgsea
using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC)
names(ranks) <- gseaDat$Entrez
load("../Robjects/mouse_c2_v5.RData")
pathwaysC2 <- Mm.c2
fgseaResC2 <- fgsea(pathwaysC2, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysC2, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResC2[order(padj, -abs(NES)), ], n=10)
## pathway
## 1: REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
## 2: REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
## 3: REACTOME_INFLUENZA_LIFE_CYCLE
## 4: REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
## 5: REACTOME_PEPTIDE_CHAIN_ELONGATION
## 6: KEGG_RIBOSOME
## 7: REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## 8: REACTOME_METABOLISM_OF_MRNA
## 9: REACTOME_METABOLISM_OF_RNA
## 10: REACTOME_TRANSLATION
## pval padj ES NES nMoreExtreme size
## 1: 0.002227171 0.03874331 -0.8873458 -2.671111 0 100
## 2: 0.002227171 0.03874331 -0.8855713 -2.665769 0 100
## 3: 0.002257336 0.03874331 -0.8585177 -2.665192 0 128
## 4: 0.002197802 0.03874331 -0.8842923 -2.646739 0 95
## 5: 0.002227171 0.03874331 -0.9083812 -2.646563 0 79
## 6: 0.002207506 0.03874331 -0.8967344 -2.631749 0 81
## 7: 0.002188184 0.03874331 -0.8426403 -2.560901 0 103
## 8: 0.002415459 0.03874331 -0.7740333 -2.541067 0 202
## 9: 0.002457002 0.03874331 -0.7546201 -2.534099 0 244
## 10: 0.002304147 0.03874331 -0.8000658 -2.519187 0 141
## leadingEdge
## 1: 20085,19982,269261,20084,20103,74133,...
## 2: 20085,19982,269261,20084,20103,65019,...
## 3: 20085,19982,269261,20084,20103,65019,...
## 4: 20085,19982,269261,20084,20103,65019,...
## 5: 20085,19982,269261,20084,20103,65019,...
## 6: 20085,19982,269261,20084,20103,65019,...
## 7: 20085,19982,269261,20084,20103,65019,...
## 8: 20085,19982,269261,20084,20103,74133,...
## 9: 20085,19982,269261,20084,20103,74133,...
## 10: 20085,19982,269261,20084,20103,65019,...
fgseaResH <- fgsea(pathwaysH, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResH[order(padj, -abs(NES)), ], n=20)
## pathway pval padj ES
## 1: HALLMARK_MYC_TARGETS_V1 0.002288330 0.009534706 -0.6826742
## 2: HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.001776199 0.009534706 0.6475885
## 3: HALLMARK_TGF_BETA_SIGNALING 0.002024291 0.009534706 -0.7419824
## 4: HALLMARK_CHOLESTEROL_HOMEOSTASIS 0.001915709 0.009534706 0.7008853
## 5: HALLMARK_MYC_TARGETS_V2 0.002044990 0.009534706 -0.7011704
## 6: HALLMARK_E2F_TARGETS 0.002288330 0.009534706 -0.5619850
## 7: HALLMARK_ADIPOGENESIS 0.001782531 0.009534706 0.5730449
## 8: HALLMARK_FATTY_ACID_METABOLISM 0.001845018 0.009534706 0.5762348
## 9: HALLMARK_MTORC1_SIGNALING 0.001779359 0.009534706 0.5499793
## 10: HALLMARK_G2M_CHECKPOINT 0.002288330 0.009534706 -0.5058712
## 11: HALLMARK_ESTROGEN_RESPONSE_EARLY 0.002237136 0.009534706 -0.4818514
## 12: HALLMARK_ALLOGRAFT_REJECTION 0.002283105 0.009534706 -0.4604850
## 13: HALLMARK_NOTCH_SIGNALING 0.004115226 0.015827794 -0.6956626
## 14: HALLMARK_PEROXISOME 0.005649718 0.020177563 0.6087801
## 15: HALLMARK_WNT_BETA_CATENIN_SIGNALING 0.006302521 0.021008403 -0.6575068
## 16: HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.009070295 0.028344671 -0.4490023
## 17: HALLMARK_BILE_ACID_METABOLISM 0.013084112 0.036710720 0.5475702
## 18: HALLMARK_DNA_REPAIR 0.013215859 0.036710720 -0.4592440
## 19: HALLMARK_ESTROGEN_RESPONSE_LATE 0.015909091 0.041866029 -0.4157999
## 20: HALLMARK_PROTEIN_SECRETION 0.020715631 0.051789077 0.4990399
## NES nMoreExtreme size
## 1: -2.237411 0 201
## 2: 2.057285 0 196
## 3: -2.029937 0 54
## 4: 1.973205 0 73
## 5: -1.951226 0 58
## 6: -1.841862 0 201
## 7: 1.818820 0 199
## 8: 1.772653 0 155
## 9: 1.749059 0 203
## 10: -1.657953 0 201
## 11: -1.584106 0 193
## 12: -1.508495 0 202
## 13: -1.708253 1 31
## 14: 1.758508 2 95
## 15: -1.681297 2 40
## 16: -1.472893 3 199
## 17: 1.585341 6 98
## 18: -1.457154 5 150
## 19: -1.364333 6 197
## 20: 1.442804 10 96
## leadingEdge
## 1: 23918,20103,12567,27050,20104,14694,...
## 2: 108664,73834,20930,67863,12039,109754,...
## 3: 15901,17131,15903,16997,13805,20482,...
## 4: 170459,16835,208715,63913,319554,235293,...
## 5: 12567,18148,53608,13537,101612,235036,...
## 6: 12704,15374,12567,218210,16201,20492,...
## 7: 12683,227095,13476,18405,72157,12039,...
## 8: 12683,208715,227095,319554,12613,18194,...
## 9: 12450,170459,16835,208715,63913,319554,...
## 10: 15374,17975,12567,16201,105727,27041,...
## 11: 11839,22403,27205,98952,16763,12614,...
## 12: 20085,16156,20005,18148,76846,20091,...
## 13: 18128,109689,74198,15205,21402,56198,...
## 14: 16413,19299,319554,12613,14712,66887,...
## 15: 18128,22419,15213,12006,20602,21414,...
## 16: 11839,12156,16878,18124,13537,14528,...
## 17: 19299,319554,14712,66887,18642,14711,...
## 18: 23918,11636,20016,100042069,21681,69241,...
## 19: 14184,11839,22403,98952,14620,69540,...
## 20: 108664,22319,56382,99371,69938,12176,...
- Create a gene list for genes that are up-regulated by at least 4x (logFC>2) in lactating mice
- Run a
goseq
analysis on this gene list
- Plot the results
- How is this result different to the previous GO analysis?
Look at genes that are up regulated by at least 4x (logFC>2) in lactating mices
isSigGeneUp <- shrinkLvV$FDR < 0.01 &
!is.na(shrinkLvV$FDR) &
shrinkLvV$logFC > 2
genesUp <- as.integer(isSigGeneUp)
names(genesUp) <- shrinkLvV$GeneID
pwf <- nullp(genesUp, "mm10", "ensGene", bias.data = shrinkLvV$medianTxLength)
## Warning in pcls(G): initial point very close to some inequality constraints
goResultsUp <- goseq(pwf, "mm10","ensGene", test.cats=c("GO:BP"))
goResultsUp %>%
top_n(10, wt=-over_represented_pvalue) %>%
mutate(hitsPerc=numDEInCat*100/numInCat) %>%
ggplot(aes(x=hitsPerc,
y=term,
colour=over_represented_pvalue,
size=numDEInCat)) +
geom_point() +
expand_limits(x=0) +
labs(x="Hits (%)", y="GO term", colour="p value", size="Count")
- Use
pathview
to export a figure for “mmu04060”, but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)
logFC <- annotLvV$logFC[sigGenes]
names(logFC) <- annotLvV$Entrez[sigGenes]
pathview(gene.data = logFC,
pathway.id = "mmu04060",
species = "mmu",
limit = list(gene=5, cpd=1))
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/sawle01/Documents/training/CRUK_CI_RNAseq_course/Course_Materials/solutions
## Info: Writing image file mmu04060.pathview.png
mmu04060.pathview.png: