Chapter 3 Alignment and feature counting
3.1 Introduction
We will use two sets of Bone Marrow Mononuclear Cells (BMMC):
- ‘CaronBourque2020’: pediatric samples
- ‘Hca’: HCA Census of Immune Cells for adult BMMCs
Fastq files were retrieved from publicly available archive (SRA and HCA).
Sequencing quality was assessed and visualised using fastQC and MultiQC.
Reads were aligned against GRCh38 and features counted using cellranger (v3.1.0).
#wrkDir <- "/mnt/scratchb/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020/CaronBourque2020/grch38300"
#setwd(wrkDir)
projDir <- params$projDir
#projDirLink <- "/Users/baller01/MyMount/svr008ssd/20200511_FernandesM_ME_crukBiSs2020"
projDirLink <- gsub("/ssd/personal/baller01", "/Users/baller01/MyMount/svr008ssd", projDir)
inpDirBit <- params$inpDirBit # "AnaWiSeurat/Attempt1"
outDirBit <- params$outDirBit # "AnaWiSeurat/Attempt1"
plotDir <- "QcPlots"
# eg # cellrangerDirLink <- sprintf("%s/%s/grch38300", projDirLink, "CaronBourque2020")
3.2 10X cellranger pipeline in brief
Each sample was analysed separately with cellranger. This pipeline “is a set of analysis pipelines that process Chromium single-cell RNA-seq output to align reads, generate feature-barcode matrices and perform clustering and gene expression analysis.”
TODO Add code to call cellranger
3.3 sample sheet
# CaronBourque2020
cb_sampleSheetFn <- file.path(projDir, "Data/CaronBourque2020/SraRunTable.txt")
# Human Cell Atlas
hca_sampleSheetFn <- file.path(projDir, "Data/Hca/accList_Hca.txt")
# read sample sheet in:
splShtColToKeep <- c("Run", "Sample.Name", "source_name")
cb_sampleSheet <- read.table(cb_sampleSheetFn, header=T, sep=",")
hca_sampleSheet <- read.table(hca_sampleSheetFn, header=F, sep=",")
colnames(hca_sampleSheet) <- "Sample.Name"
hca_sampleSheet$Run <- hca_sampleSheet$Sample.Name
hca_sampleSheet$source_name <- "ABMMC" # adult BMMC
sampleSheetCat <- rbind(cb_sampleSheet[,splShtColToKeep], hca_sampleSheet[,splShtColToKeep])
3.4 10X cellranger reports for CaronBourque2020
#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"
# make dir name for each sample of interest
# with 'Run' column
sampleSheet <- sampleSheetCat %>%
filter(! source_name == "ABMMC")
cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "CaronBourque2020")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run
for(i in 1:length(htmlVec)){
cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}
3.5 10X cellranger reports for HCA’s adult BMMCs
#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"
# make dir name for each sample of interest
# with 'Run' column
sampleSheet <- sampleSheetCat %>%
filter(source_name == "ABMMC")
cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "Hca")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run
for(i in 1:length(htmlVec)){
cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}