- Use
pathview
to export a figure for "mmu04060", but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)
logFC <- shrinkLvV$logFC[sigGenes]
names(logFC) <- shrinkLvV$Entrez[sigGenes]
pathview(gene.data = logFC,
pathway.id = "mmu04060",
species = "mmu",
limit = list(gene=5, cpd=1))
## Loading required namespace: org.Mm.eg.db
##
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/cri.camres.org/sawle01/Documents/training/RNAseq_Summer_School_2020/Course_Materials/solutions
## Info: Writing image file mmu04060.pathview.png
mmu04060.pathview.png:
Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue.
- Rank the genes by statisical significance - you will need to create a new ranking value using
-log10({p value}) * sign({Fold Change})
- Load the "C2" pathways from the the
data/mouse_c2_v5.RData
file
- Run
fgsea
using the new ranked genes and the C2 pathways
- Run
fgsea
using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
# 1.
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC) # or gseaDat$stat
names(ranks) <- gseaDat$Entrez
head(ranks)
## 497097 19888 20671 27395 18777 21399
## 0.4482109 -1.6181302 -1.0403297 2.4595958 5.4036283 -1.0508420
# 2. Load the object Mm.C2
load("Robjects/mouse_c2_v5.RData")
head(names(Mm.c2))
## [1] "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
## [2] "KEGG_CITRATE_CYCLE_TCA_CYCLE"
## [3] "KEGG_PENTOSE_PHOSPHATE_PATHWAY"
## [4] "KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS"
## [5] "KEGG_FRUCTOSE_AND_MANNOSE_METABOLISM"
## [6] "KEGG_GALACTOSE_METABOLISM"
# 3.
fgseaResC2 <- fgsea(Mm.c2, ranks, minSize=15, maxSize = 500)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaResC2 %>%
arrange(padj, desc(abs(NES))) %>%
top_n(10, -padj) %>%
as_tibble()
## # A tibble: 16 x 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 REACTOME_INFLUENZA_… 1.00e-10 2.28e-8 NA -0.859 -2.68 128 <chr [80]>
## 2 REACTOME_NONSENSE_M… 1.00e-10 2.28e-8 NA -0.887 -2.64 100 <chr [74]>
## 3 REACTOME_3_UTR_MEDI… 1.00e-10 2.28e-8 NA -0.886 -2.64 100 <chr [68]>
## 4 REACTOME_PEPTIDE_CH… 1.00e-10 2.28e-8 NA -0.908 -2.61 79 <chr [68]>
## 5 REACTOME_INFLUENZA_… 1.00e-10 2.28e-8 NA -0.884 -2.61 95 <chr [69]>
## 6 KEGG_RIBOSOME 1.00e-10 2.28e-8 NA -0.897 -2.59 81 <chr [66]>
## 7 REACTOME_METABOLISM… 1.00e-10 2.28e-8 NA -0.774 -2.56 202 <chr [87]>
## 8 REACTOME_METABOLISM… 1.00e-10 2.28e-8 NA -0.755 -2.55 244 <chr [97]>
## 9 REACTOME_SRP_DEPEND… 1.00e-10 2.28e-8 NA -0.843 -2.53 103 <chr [67]>
## 10 REACTOME_TRANSLATION 1.00e-10 2.28e-8 NA -0.800 -2.52 141 <chr [71]>
## 11 REACTOME_FORMATION_… 1.00e-10 2.28e-8 NA -0.897 -2.40 48 <chr [28]>
## 12 BILANGES_SERUM_AND_… 1.00e-10 2.28e-8 NA -0.850 -2.38 66 <chr [40]>
## 13 REACTOME_ACTIVATION… 1.00e-10 2.28e-8 NA -0.864 -2.36 56 <chr [29]>
## 14 CHNG_MULTIPLE_MYELO… 1.00e-10 2.28e-8 NA -0.847 -2.31 55 <chr [29]>
## 15 HSIAO_HOUSEKEEPING_… 1.00e-10 2.28e-8 NA -0.605 -2.15 396 <chr [105]>
## 16 REACTOME_METABOLISM… 1.00e-10 2.28e-8 NA -0.558 -1.98 398 <chr [93]>
# 4.
fgseaResH <- fgsea(Mm.H, ranks, minSize=15, maxSize = 500)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaResH %>%
arrange(padj, desc(abs(NES))) %>%
top_n(10, -padj) %>%
as_tibble()
## # A tibble: 10 x 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 HALLMARK_MYC_TARG… 1.00e-10 5.00e-9 NA -0.683 -2.25 201 <chr [56]>
## 2 HALLMARK_OXIDATIV… 1.30e- 8 3.26e-7 0.748 0.648 2.03 196 <chr [128]>
## 3 HALLMARK_E2F_TARG… 5.32e- 7 8.87e-6 0.659 -0.562 -1.85 201 <chr [49]>
## 4 HALLMARK_TGF_BETA… 9.19e- 7 1.15e-5 0.659 -0.742 -2.04 54 <chr [15]>
## 5 HALLMARK_MYC_TARG… 1.84e- 5 1.84e-4 0.576 -0.701 -1.94 58 <chr [25]>
## 6 HALLMARK_ADIPOGEN… 2.63e- 5 2.19e-4 0.576 0.573 1.80 199 <chr [79]>
## 7 HALLMARK_CHOLESTE… 3.96e- 5 2.49e-4 0.557 0.701 1.97 73 <chr [29]>
## 8 HALLMARK_MTORC1_S… 3.99e- 5 2.49e-4 0.557 0.550 1.74 203 <chr [47]>
## 9 HALLMARK_G2M_CHEC… 6.05e- 5 3.36e-4 0.557 -0.506 -1.67 201 <chr [29]>
## 10 HALLMARK_FATTY_AC… 2.21e- 4 1.11e-3 0.519 0.576 1.77 155 <chr [54]>