Challenge 1

Another common way to rank the genes is to order by pvalue, but also, sorting so that upregulated genes are at start and downregulated at the other - you can do this combining the sign of the fold change and the pvalue.

  1. Rank the genes by statisical significance - you will need to create a new ranking value using -log10({p value}) * sign({Fold Change})
  2. Load the “C2” pathways from the the data/mouse_c2_v5.RData file
  3. Run fgsea using the new ranked genes and the C2 pathways
  4. Run fgsea using the new ranked genes and the H pathways. How do these results differ from the ones we got when ranking by the fold change alone?
ranks <- -log10(gseaDat$pvalue) * sign(gseaDat$logFC) # or gseaDat$stat
names(ranks) <- gseaDat$Entrez  

load("../Robjects/mouse_c2_v5.RData")
pathwaysC2 <- Mm.c2

fgseaResC2 <- fgsea(pathwaysC2, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysC2, ranks, minSize = 15, maxSize = 500, nperm = 1000):
## You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To
## run fgseaMultilevel, you need to remove the nperm argument in the fgsea function
## call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResC2[order(padj, -abs(NES)), ], n=10)
##                                                                    pathway
##  1:                                          REACTOME_INFLUENZA_LIFE_CYCLE
##  2: REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
##  3:                       REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
##  4:             REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
##  5:                                      REACTOME_PEPTIDE_CHAIN_ELONGATION
##  6:                                                          KEGG_RIBOSOME
##  7:                                            REACTOME_METABOLISM_OF_MRNA
##  8:                                             REACTOME_METABOLISM_OF_RNA
##  9:   REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## 10:                                                   REACTOME_TRANSLATION
##            pval      padj         ES       NES nMoreExtreme size
##  1: 0.002277904 0.0508366 -0.8585177 -2.662652            0  128
##  2: 0.002309469 0.0508366 -0.8873458 -2.643421            0  100
##  3: 0.002309469 0.0508366 -0.8855713 -2.638135            0  100
##  4: 0.002257336 0.0508366 -0.8842923 -2.623054            0   95
##  5: 0.002304147 0.0508366 -0.9083812 -2.619290            0   79
##  6: 0.002320186 0.0508366 -0.8967344 -2.600916            0   81
##  7: 0.002386635 0.0508366 -0.7740333 -2.522087            0  202
##  8: 0.002444988 0.0508366 -0.7546201 -2.515622            0  244
##  9: 0.002336449 0.0508366 -0.8426403 -2.514598            0  103
## 10: 0.002320186 0.0508366 -0.8000658 -2.487740            0  141
##                                  leadingEdge
##  1: 20085,19982,269261,20084,20103,65019,...
##  2: 20085,19982,269261,20084,20103,74133,...
##  3: 20085,19982,269261,20084,20103,65019,...
##  4: 20085,19982,269261,20084,20103,65019,...
##  5: 20085,19982,269261,20084,20103,65019,...
##  6: 20085,19982,269261,20084,20103,65019,...
##  7: 20085,19982,269261,20084,20103,74133,...
##  8: 20085,19982,269261,20084,20103,74133,...
##  9: 20085,19982,269261,20084,20103,65019,...
## 10: 20085,19982,269261,20084,20103,65019,...
fgseaResH <- fgsea(pathwaysH, ranks, minSize=15, maxSize = 500, nperm=1000)
## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument in the fgsea function call.

## Warning in fgsea(pathwaysH, ranks, minSize = 15, maxSize = 500, nperm = 1000): There are ties in the preranked stats (0.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
head(fgseaResH[order(padj, -abs(NES)), ], n=20)
##                                 pathway        pval       padj         ES
##  1:             HALLMARK_MYC_TARGETS_V1 0.002277904 0.01052189 -0.6826742
##  2:  HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.001748252 0.01052189  0.6475885
##  3:         HALLMARK_TGF_BETA_SIGNALING 0.002114165 0.01052189 -0.7419824
##  4:    HALLMARK_CHOLESTEROL_HOMEOSTASIS 0.001821494 0.01052189  0.7008853
##  5:             HALLMARK_MYC_TARGETS_V2 0.002066116 0.01052189 -0.7011704
##  6:                HALLMARK_E2F_TARGETS 0.002277904 0.01052189 -0.5619850
##  7:               HALLMARK_ADIPOGENESIS 0.001745201 0.01052189  0.5730449
##  8:           HALLMARK_MTORC1_SIGNALING 0.001751313 0.01052189  0.5499793
##  9:                 HALLMARK_PEROXISOME 0.001845018 0.01052189  0.6087801
## 10:             HALLMARK_G2M_CHECKPOINT 0.002277904 0.01052189 -0.5058712
## 11:    HALLMARK_ESTROGEN_RESPONSE_EARLY 0.002314815 0.01052189 -0.4818514
## 12:      HALLMARK_FATTY_ACID_METABOLISM 0.003663004 0.01526252  0.5762348
## 13:       HALLMARK_BILE_ACID_METABOLISM 0.005504587 0.02117149  0.5475702
## 14:        HALLMARK_ALLOGRAFT_REJECTION 0.006849315 0.02446184 -0.4604850
## 15: HALLMARK_WNT_BETA_CATENIN_SIGNALING 0.008421053 0.02631579 -0.6575068
## 16:            HALLMARK_NOTCH_SIGNALING 0.008179959 0.02631579 -0.6956626
## 17:                 HALLMARK_DNA_REPAIR 0.010989011 0.03232062 -0.4592440
## 18:    HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.011655012 0.03237503 -0.4490023
## 19:     HALLMARK_ESTROGEN_RESPONSE_LATE 0.021176471 0.05572755 -0.4157999
## 20:          HALLMARK_PROTEIN_SECRETION 0.025878004 0.06469501  0.4990399
##           NES nMoreExtreme size                                 leadingEdge
##  1: -2.264405            0  201     23918,20103,12567,27050,20104,14694,...
##  2:  2.066799            0  196   108664,73834,20930,67863,12039,109754,...
##  3: -2.009290            0   54     15901,17131,15903,16997,13805,20482,...
##  4:  1.947338            0   73 170459,16835,208715,63913,319554,235293,...
##  5: -1.943605            0   58   12567,18148,53608,13537,101612,235036,...
##  6: -1.864084            0  201    12704,15374,12567,218210,16201,20492,...
##  7:  1.832673            0  199    12683,227095,13476,18405,72157,12039,...
##  8:  1.763742            0  203  12450,170459,16835,208715,63913,319554,...
##  9:  1.760775            0   95    16413,19299,319554,12613,14712,66887,...
## 10: -1.677956            0  201    15374,17975,12567,16201,105727,27041,...
## 11: -1.584557            0  193     11839,22403,27205,98952,16763,12614,...
## 12:  1.764785            1  155  12683,208715,227095,319554,12613,18194,...
## 13:  1.594234            2   98    19299,319554,14712,66887,18642,14711,...
## 14: -1.525955            2  202     20085,16156,20005,18148,76846,20091,...
## 15: -1.693331            3   40     18128,22419,15213,12006,20602,21414,...
## 16: -1.693149            3   31    18128,109689,74198,15205,21402,56198,...
## 17: -1.451626            4  150 23918,11636,20016,100042069,21681,69241,...
## 18: -1.481846            4  199     11839,12156,16878,18124,13537,14528,...
## 19: -1.369648            8  197     14184,11839,22403,98952,14620,69540,...
## 20:  1.445366           13   96    108664,22319,56382,99371,69938,12176,...

Challenge 2

  1. Create a gene list for genes that are up-regulated by at least 4x (logFC>2) in lactating mice
  2. Run a goseq analysis on this gene list
  3. Plot the results
  4. How is this result different to the previous GO analysis?

Look at genes that are up regulated by at least 4x (logFC>2) in lactating mices

isSigGeneUp <- shrinkLvV$FDR < 0.01 &
    !is.na(shrinkLvV$FDR) &
    shrinkLvV$logFC > 2

genesUp <- as.integer(isSigGeneUp)
names(genesUp) <- shrinkLvV$GeneID

pwf <- nullp(genesUp, "mm10", "ensGene", bias.data = shrinkLvV$medianTxLength)
## Warning in pcls(G): initial point very close to some inequality constraints

goResultsUp <- goseq(pwf, "mm10","ensGene", test.cats=c("GO:BP"))

goResultsUp %>%
    top_n(10, wt=-over_represented_pvalue) %>%
    mutate(hitsPerc=numDEInCat*100/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=term, 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        expand_limits(x=0) +
        labs(x="Hits (%)", y="GO term", colour="p value", size="Count")

Challenge 3

  1. Use pathview to export a figure for “mmu04060”, but this time only use genes that are statistically significant at FDR < 0.01
sigGenes <- shrinkLvV$FDR < 0.01 & !is.na(shrinkLvV$FDR)

logFC <- shrinkLvV$logFC[sigGenes]
names(logFC) <- shrinkLvV$Entrez[sigGenes]

pathview(gene.data = logFC, 
         pathway.id = "mmu04060", 
         species = "mmu", 
         limit = list(gene=5, cpd=1))
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/ubuntu/Course_Materials/solutions
## Info: Writing image file mmu04060.pathview.png

mmu04060.pathview.png:

mmu04060 -Cytokine-cytokine receptor interaction

mmu04060 -Cytokine-cytokine receptor interaction