Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue.
- Rank the genes by statisical significance - you will need to create a new ranking value using
-log10({p value}) * sign({Fold Change})
- Load the “C2” pathways from the the
data/mouse_c2_v5.RData
file
- Run
fgsea
using the new ranked genes and the C2 pathways
- Run
fgsea
using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC) # or gseaDat$stat
names(ranks) <- gseaDat$Entrez
load("../Robjects/mouse_c2_v5.RData")
pathwaysC2 <- Mm.c2
fgseaResC2 <- fgsea(pathwaysC2, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysC2, ranks, minSize = 15, maxSize = 500, nperm = 1000):
## You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To
## run fgseaMultilevel, you need to remove the nperm argument in the fgsea function
## call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResC2[order(padj, -abs(NES)), ], n=10)
## pathway
## 1: REACTOME_INFLUENZA_LIFE_CYCLE
## 2: REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
## 3: REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
## 4: REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
## 5: REACTOME_PEPTIDE_CHAIN_ELONGATION
## 6: KEGG_RIBOSOME
## 7: REACTOME_METABOLISM_OF_MRNA
## 8: REACTOME_METABOLISM_OF_RNA
## 9: REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## 10: REACTOME_TRANSLATION
## pval padj ES NES nMoreExtreme size
## 1: 0.002277904 0.0508366 -0.8585177 -2.662652 0 128
## 2: 0.002309469 0.0508366 -0.8873458 -2.643421 0 100
## 3: 0.002309469 0.0508366 -0.8855713 -2.638135 0 100
## 4: 0.002257336 0.0508366 -0.8842923 -2.623054 0 95
## 5: 0.002304147 0.0508366 -0.9083812 -2.619290 0 79
## 6: 0.002320186 0.0508366 -0.8967344 -2.600916 0 81
## 7: 0.002386635 0.0508366 -0.7740333 -2.522087 0 202
## 8: 0.002444988 0.0508366 -0.7546201 -2.515622 0 244
## 9: 0.002336449 0.0508366 -0.8426403 -2.514598 0 103
## 10: 0.002320186 0.0508366 -0.8000658 -2.487740 0 141
## leadingEdge
## 1: 20085,19982,269261,20084,20103,65019,...
## 2: 20085,19982,269261,20084,20103,74133,...
## 3: 20085,19982,269261,20084,20103,65019,...
## 4: 20085,19982,269261,20084,20103,65019,...
## 5: 20085,19982,269261,20084,20103,65019,...
## 6: 20085,19982,269261,20084,20103,65019,...
## 7: 20085,19982,269261,20084,20103,74133,...
## 8: 20085,19982,269261,20084,20103,74133,...
## 9: 20085,19982,269261,20084,20103,65019,...
## 10: 20085,19982,269261,20084,20103,65019,...
fgseaResH <- fgsea(pathwaysH, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument in the fgsea function call.
## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResH[order(padj, -abs(NES)), ], n=20)
## pathway pval padj ES
## 1: HALLMARK_MYC_TARGETS_V1 0.002277904 0.01052189 -0.6826742
## 2: HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.001748252 0.01052189 0.6475885
## 3: HALLMARK_TGF_BETA_SIGNALING 0.002114165 0.01052189 -0.7419824
## 4: HALLMARK_CHOLESTEROL_HOMEOSTASIS 0.001821494 0.01052189 0.7008853
## 5: HALLMARK_MYC_TARGETS_V2 0.002066116 0.01052189 -0.7011704
## 6: HALLMARK_E2F_TARGETS 0.002277904 0.01052189 -0.5619850
## 7: HALLMARK_ADIPOGENESIS 0.001745201 0.01052189 0.5730449
## 8: HALLMARK_MTORC1_SIGNALING 0.001751313 0.01052189 0.5499793
## 9: HALLMARK_PEROXISOME 0.001845018 0.01052189 0.6087801
## 10: HALLMARK_G2M_CHECKPOINT 0.002277904 0.01052189 -0.5058712
## 11: HALLMARK_ESTROGEN_RESPONSE_EARLY 0.002314815 0.01052189 -0.4818514
## 12: HALLMARK_FATTY_ACID_METABOLISM 0.003663004 0.01526252 0.5762348
## 13: HALLMARK_BILE_ACID_METABOLISM 0.005504587 0.02117149 0.5475702
## 14: HALLMARK_ALLOGRAFT_REJECTION 0.006849315 0.02446184 -0.4604850
## 15: HALLMARK_WNT_BETA_CATENIN_SIGNALING 0.008421053 0.02631579 -0.6575068
## 16: HALLMARK_NOTCH_SIGNALING 0.008179959 0.02631579 -0.6956626
## 17: HALLMARK_DNA_REPAIR 0.010989011 0.03232062 -0.4592440
## 18: HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.011655012 0.03237503 -0.4490023
## 19: HALLMARK_ESTROGEN_RESPONSE_LATE 0.021176471 0.05572755 -0.4157999
## 20: HALLMARK_PROTEIN_SECRETION 0.025878004 0.06469501 0.4990399
## NES nMoreExtreme size leadingEdge
## 1: -2.264405 0 201 23918,20103,12567,27050,20104,14694,...
## 2: 2.066799 0 196 108664,73834,20930,67863,12039,109754,...
## 3: -2.009290 0 54 15901,17131,15903,16997,13805,20482,...
## 4: 1.947338 0 73 170459,16835,208715,63913,319554,235293,...
## 5: -1.943605 0 58 12567,18148,53608,13537,101612,235036,...
## 6: -1.864084 0 201 12704,15374,12567,218210,16201,20492,...
## 7: 1.832673 0 199 12683,227095,13476,18405,72157,12039,...
## 8: 1.763742 0 203 12450,170459,16835,208715,63913,319554,...
## 9: 1.760775 0 95 16413,19299,319554,12613,14712,66887,...
## 10: -1.677956 0 201 15374,17975,12567,16201,105727,27041,...
## 11: -1.584557 0 193 11839,22403,27205,98952,16763,12614,...
## 12: 1.764785 1 155 12683,208715,227095,319554,12613,18194,...
## 13: 1.594234 2 98 19299,319554,14712,66887,18642,14711,...
## 14: -1.525955 2 202 20085,16156,20005,18148,76846,20091,...
## 15: -1.693331 3 40 18128,22419,15213,12006,20602,21414,...
## 16: -1.693149 3 31 18128,109689,74198,15205,21402,56198,...
## 17: -1.451626 4 150 23918,11636,20016,100042069,21681,69241,...
## 18: -1.481846 4 199 11839,12156,16878,18124,13537,14528,...
## 19: -1.369648 8 197 14184,11839,22403,98952,14620,69540,...
## 20: 1.445366 13 96 108664,22319,56382,99371,69938,12176,...
- Create a gene list for genes that are up-regulated by at least 4x (logFC>2) in lactating mice
- Run a
goseq
analysis on this gene list
- Plot the results
- How is this result different to the previous GO analysis?
Look at genes that are up regulated by at least 4x (logFC>2) in lactating mices
isSigGeneUp <- shrinkLvV$FDR < 0.01 &
!is.na(shrinkLvV$FDR) &
shrinkLvV$logFC > 2
genesUp <- as.integer(isSigGeneUp)
names(genesUp) <- shrinkLvV$GeneID
pwf <- nullp(genesUp, "mm10", "ensGene", bias.data = shrinkLvV$medianTxLength)
## Warning in pcls(G): initial point very close to some inequality constraints
goResultsUp <- goseq(pwf, "mm10","ensGene", test.cats=c("GO:BP"))
goResultsUp %>%
top_n(10, wt=-over_represented_pvalue) %>%
mutate(hitsPerc=numDEInCat*100/numInCat) %>%
ggplot(aes(x=hitsPerc,
y=term,
colour=over_represented_pvalue,
size=numDEInCat)) +
geom_point() +
expand_limits(x=0) +
labs(x="Hits (%)", y="GO term", colour="p value", size="Count")
- Use
pathview
to export a figure for “mmu04060”, but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)
logFC <- shrinkLvV$logFC[sigGenes]
names(logFC) <- shrinkLvV$Entrez[sigGenes]
pathview(gene.data = logFC,
pathway.id = "mmu04060",
species = "mmu",
limit = list(gene=5, cpd=1))
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/ubuntu/Course_Materials/solutions
## Info: Writing image file mmu04060.pathview.png
mmu04060.pathview.png: