Load the required packages and data for Day 11 if you have not already done so.
library(msigdbr)
library(clusterProfiler)
##
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## inner_join.phylo tidytree
## inner_join.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## clusterProfiler v4.8.3 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
##
## filter
library(pathview)
##
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
library(tidyverse)
## ── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks clusterProfiler::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::simplify() masks clusterProfiler::simplify()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
shrink.d11 <- readRDS("RObjects/Shrunk_Results.d11.rds")
- Use
pathview
to export a figure for “mmu04659”or “mmu04658”, but this time only use genes that are statistically significant at FDR < 0.01
logFC <- shrink.d11 %>%
drop_na(padj, Entrez) %>%
filter(padj < 0.01) %>%
pull(log2FoldChange, Entrez)
pathview(gene.data = logFC,
pathway.id = "mmu04659",
species = "mmu",
limit = list(gene=5, cpd=1))
## Loading required namespace: org.Mm.eg.db
##
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /mnt/c/Users/hugot/Documents/BTF/course_materials/Bulk_RNAseq_Course_Base/course_files
## Info: Writing image file mmu04659.pathview.png
mmu04659.pathview.png:
mmu04659 - Th17 cell differentiation
Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at the start and downregulated at the end - you can do this combining the sign of the fold change and the pvalue.
First load the pathway details if you have not already done so.
library(msigdbr)
term2gene <- msigdbr(species = "Mus musculus", category = "H") %>%
dplyr::select(gs_name, ensembl_gene)
term2name <- msigdbr(species = "Mus musculus", category = "H") %>%
dplyr::select(gs_name, gs_description) %>%
distinct()
- Rank the genes by statistical significance - you will need to create a new ranking value using
-log10({p value}) * sign({Fold Change})
.
# rank genes
rankedGenes.e11 <- shrink.d11 %>%
drop_na(GeneID, pvalue, log2FoldChange) %>%
mutate(rank = -log10(pvalue) * sign(log2FoldChange)) %>%
arrange(desc(rank)) %>%
pull(rank, GeneID)
- Run GSEA using the new ranked genes and the Hallmark pathways.
# conduct analysis:
gseaRes.e11 <- GSEA(rankedGenes.e11,
TERM2GENE = term2gene,
TERM2NAME = term2name,
pvalueCutoff = 1.00,
minGSSize = 15,
maxGSSize = 500)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
View the results:
as_tibble(gseaRes.e11) %>%
arrange(desc(abs(NES))) %>%
top_n(10, wt=-p.adjust) %>%
dplyr::select(-core_enrichment) %>%
mutate(across(c("enrichmentScore", "NES"), round, digits=3)) %>%
mutate(across(c("pvalue", "p.adjust", "qvalue"), scales::scientific))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(c("enrichmentScore", "NES"), round, digits = 3)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
- Conduct the same analysis for the day 33 Infected vs Uninfected contrast.
# read d33 data in:
shrink.d33 <- readRDS("RObjects/Shrunk_Results.d33.rds")
# rank genes
rankedGenes.e33 <- shrink.d33 %>%
drop_na(GeneID, pvalue, log2FoldChange) %>%
mutate(rank = -log10(pvalue) * sign(log2FoldChange)) %>%
arrange(desc(rank)) %>%
pull(rank,GeneID)
# perform analysis
gseaRes.e33 <- GSEA(rankedGenes.e33,
TERM2GENE = term2gene,
TERM2NAME = term2name,
pvalueCutoff = 1.00,
minGSSize = 15,
maxGSSize = 500)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 3 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
View the results:
as_tibble(gseaRes.e33) %>%
arrange(desc(abs(NES))) %>%
top_n(10, wt=-p.adjust) %>%
dplyr::select(-core_enrichment) %>%
mutate(across(c("enrichmentScore", "NES"), round, digits=3)) %>%
mutate(across(c("pvalue", "p.adjust", "qvalue"), scales::scientific))