## dataset description version
## 1 mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) Mmur_3.0
## 2 mmusculus_gene_ensembl Mouse genes (GRCm38.p6) GRCm38.p6
That was just 1000 genes. We need annotations for the entire results table. Also, there may be some other interesting columns in BioMart that we wish to retrieve.
- Search the attributes and add the following to our list of attributes:
- The gene description
- The gene biotype
- Query BioMart using all of the genes in our results table (
resLvV
)
- How many Ensembl genes have multipe Entrez IDs associated with them?
# Set the filter type and values
ourFilterType <- "ensembl_gene_id"
# set the values for the filter
filterValues <- rownames(resLvV)
# check the available "attributes" - things you can retreive
listAttributes(ensembl) %>%
filter(str_detect(name, "description"))
listAttributes(ensembl) %>%
filter(str_detect(name, "biotype"))
## name description page
## 1 description Gene description feature_page
## 2 phenotype_description Phenotype description feature_page
## 3 goslim_goa_description GOSlim GOA Description feature_page
## 4 mgi_description MGI description feature_page
## 5 entrezgene_description NCBI gene description feature_page
## 6 wikigene_description WikiGene description feature_page
## 7 family_description Ensembl Family Description feature_page
## 8 interpro_short_description Interpro Short Description feature_page
## 9 interpro_description Interpro Description feature_page
## 10 description Gene description structure
## 11 description Gene description homologs
## 12 description Gene description snp
## 13 source_description Variant source description snp
## 14 description Gene description sequences
## name description page
## 1 gene_biotype Gene type feature_page
## 2 transcript_biotype Transcript type feature_page
## 3 gene_biotype Gene type structure
## 4 gene_biotype Gene type sequences
## 5 transcript_biotype Transcript type sequences
# set attributes
attributeNames <- c('ensembl_gene_id',
'entrezgene_id',
'external_gene_name',
'description',
'gene_biotype')
# run the query
annot <- getBM(attributes=attributeNames,
filters = ourFilterType,
values = filterValues,
mart = ensembl)
# count duplicate ids
annot %>%
add_count(ensembl_gene_id) %>%
filter(n>1) %>%
distinct(ensembl_gene_id) %>%
nrow()
## [1] 97
Use the log2 fold change (logFC
) on the x-axis, and use -log10(FDR)
on the y-axis. (This >-log10
transformation is commonly used for p-values as it means that more significant genes have a higher scale).
Create a column of -log10(FDR) values
Create a plot with points coloured by if FDR < 0.05
# first remove the filtered genes (FDR=NA) and create a -log10(FDR) column
filtTab <- shrinkLvV %>%
filter(!is.na(FDR)) %>%
mutate(`-log10(FDR)` = -log10(FDR))
ggplot(filtTab, aes(x = logFC, y=`-log10(FDR)`)) +
geom_point(aes(colour=FDR < 0.05), size=1)
Use the txMm to retrieve the exon coordinates for the genes:
ENSMUSG00000021604
ENSMUSG00000022146
ENSMUSG00000040118
keyList <- c("ENSMUSG00000021604", "ENSMUSG00000022146", "ENSMUSG00000040118")
AnnotationDbi::select(txMm,
keys=keyList,
keytype = "GENEID",
columns=c("TXNAME", "TXCHROM", "TXSTART", "TXEND", "TXSTRAND", "TXTYPE")
)
## GENEID TXNAME TXTYPE TXCHROM TXSTRAND
## 1 ENSMUSG00000021604 ENSMUST00000176684 transcript 13 +
## 2 ENSMUSG00000021604 ENSMUST00000022095 transcript 13 +
## 3 ENSMUSG00000022146 ENSMUST00000022746 transcript 15 -
## 4 ENSMUSG00000022146 ENSMUST00000176826 transcript 15 -
## 5 ENSMUSG00000022146 ENSMUST00000176554 transcript 15 -
## 6 ENSMUSG00000022146 ENSMUST00000175862 transcript 15 -
## 7 ENSMUSG00000022146 ENSMUST00000177478 transcript 15 -
## 8 ENSMUSG00000022146 ENSMUST00000177263 transcript 15 -
## 9 ENSMUSG00000040118 ENSMUST00000167946 transcript 5 +
## 10 ENSMUSG00000040118 ENSMUST00000101581 transcript 5 +
## 11 ENSMUSG00000040118 ENSMUST00000039370 transcript 5 +
## 12 ENSMUSG00000040118 ENSMUST00000180204 transcript 5 +
## 13 ENSMUSG00000040118 ENSMUST00000199704 transcript 5 +
## 14 ENSMUSG00000040118 ENSMUST00000078272 transcript 5 +
## 15 ENSMUSG00000040118 ENSMUST00000115281 transcript 5 +
## 16 ENSMUSG00000040118 ENSMUST00000196750 transcript 5 +
## 17 ENSMUSG00000040118 ENSMUST00000200270 transcript 5 +
## 18 ENSMUSG00000040118 ENSMUST00000200158 transcript 5 +
## 19 ENSMUSG00000040118 ENSMUST00000200294 transcript 5 +
## 20 ENSMUSG00000040118 ENSMUST00000199236 transcript 5 +
## TXSTART TXEND
## 1 73260479 73269608
## 2 73260497 73269608
## 3 6813577 6874969
## 4 6815037 6874969
## 5 6820637 6824595
## 6 6836758 6874268
## 7 6843969 6874257
## 8 6854987 6874296
## 9 15934691 16374511
## 10 15934788 16371051
## 11 15934788 16371069
## 12 15934788 16371069
## 13 15934788 16371069
## 14 15934788 16374504
## 15 15934829 16370727
## 16 15934911 16089022
## 17 16025714 16268604
## 18 16325985 16329883
## 19 16326151 16341059
## 20 16361395 16362326