bash-script
cd /home/participant/Course_Materials/ChIPSeq/Materials/
#mkdir motifs if it doesn't exist
cd motifs
R-script
# read in the peak summit file
setwd("/home/participant/Course_Materials/ChIPSeq/Materials/motifs")
getwd()
peakfile <-read.table("/home/participant/Course_Materials/ChIPSeq/Materials/Results/Peaks/TAp73beta_r2.fastq_trimmed.fastq_sorted_standard_summits.bed")
head(peakfile)
# get a 400bp window around summit (-200 and +200)
Motif_Peaks <- data.frame((peakfile$V1), (peakfile$V2-200), (peakfile$V2+200), stringsAsFactors = FALSE)
head(Motif_Peaks)
# create bed file
options(scipen=999)
write.table(Motif_Peaks, file= "/home/participant/Course_Materials/ChIPSeq/Materials/motifs/Peaks_for_motif_detection.bed",
row.names=FALSE,col.names = FALSE, sep="\t", quote =FALSE)
# list files in directory and check if the file you created is there.
dir()
R-script
library("ChIPpeakAnno")
library("GenomicRanges")
#library("rtracklayer")
#bed <- import("/home/participant/Course_Materials/ChIPSeq/Materials/motifs/Peaks_for_motif_detection.bed")
#gr1 <- GRanges(seqnames=bed$V1, ranges=IRanges(start=bed$V2, end=bed$V3))
#read in the bed file
df1<-read.table("Peaks_for_motif_detection.bed", header=FALSE)
#convert the peaks to a GRanges object
gr1 <- GRanges(seqnames=df1$V1, ranges=IRanges(start=df1$V2, end=df1$V3))
##annotation
library("EnsDb.Hsapiens.v86")
## create annotation file from EnsDb (Ensembl) or TxDb (transcript annotation) packages
annoData <- toGRanges(EnsDb.Hsapiens.v86, feature="gene")
annoData[1:2]
# annotate the peak GRanges object with peaks mapped to gene with a -2000 and 500 bp window around the TSS
anno.gr1 <- annotatePeakInBatch(gr1,
AnnotationData=annoData,
output="nearestBiDirectionalPromoters",
bindingRegion=c(-2000, 500))
#trim out of bound ranges
anno.gr1 <- trim(anno.gr1)
#annotate with Entrez IDs
library("org.Hs.eg.db")
anno.gr1 <- addGeneIDs(anno.gr1,"org.Hs.eg.db",IDs2Add = "entrez_id")
# list annotated peaks
head(anno.gr1)
MEME-ChIP performs several motif analysis steps on a set of user provided DNA sequences in fasta format. It is especially useful for analyzing peaks from ChIP-seq experiments. MEME-ChIP can
bash-script
# If strand information is known use -s option as well
bedtools getfasta -fi "/home/participant/Course_Materials/Introduction/SS_DB/Reference/BWA/hg38_chr3.fa" \
-bed "/home/participant/Course_Materials/ChIPSeq/Materials/motifs/Peaks_for_motif_detection.bed" -fo "/home/participant/Course_Materials/ChIPSeq/Materials/motifs/Tp73_peaks.fa"
less Tp73_peaks.fa
# usefull sed commands
# find a line (3rd line in this case) and see its contents
# sed -n '3p' Tp73_peaks.fa
# delete a line (DON'T RUN unless there is a need to remove a line)
# sed -i".bak" '3d' filename
bash-script
#look at the options in meme-chip, meme and dreme
meme-chip --help
meme --help
dreme --help
Meme is an Expectation Maximisation (EM) algorithm and Dreme uses pattern matching to finds de novo motifs.
The fasta file should be 500bp for each peak, Meme looks at a 100bp window around the peak center for motifs. Repeat masking is recommended.
Motif database for meme analysis has been setup. If it’s NOT there, follow the instructions below:
Download meme motif PWM databases onto your working folder PWM databases
bash-script
Run only IF motif database is missing!
cd "/home/participant/Course_Materials/ChIPSeq/Materials/motifs"
wget "http://meme-suite.org/meme-software/Databases/motifs/motif_databases.12.18.tgz"
chmod 755 motif_databases.12.18.tgz
#unzip them
tar -zxvf motif_databases.12.18.tgz
bash-script
# If the meme directory is missing create it
# mkdir meme
meme-chip -oc /home/participant/Course_Materials/ChIPSeq/Materials/motifs/meme/ \
-db /home/participant/Course_Materials/ChIPSeq/Materials/motifs/motif_databases/EUKARYOTE/jolma2013.meme \
-db /home/participant/Course_Materials/ChIPSeq/Materials/motifs/motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme \
-meme-mod zoops -meme-minw 6 -order 1 \
-meme-maxw 30 -meme-nmotifs 10 -dreme-e 0.05 -centrimo-score 5.0 -centrimo-ethresh 10.0 /home/participant/Course_Materials/ChIPSeq/Materials/motifs/Tp73_peaks.fa
or upload fasta file meme-chip