Chapter 3 Alignment and feature counting

3.1 Introduction

We will use two sets of Bone Marrow Mononuclear Cells (BMMC):

‘CaronBourque2020’: pediatric samples
‘Hca’: HCA Census of Immune Cells for adult BMMCs

Fastq files were retrieved from publicly available archive (SRA and HCA).

Sequencing quality was assessed and visualised using fastQC and MultiQC.

Reads were aligned against GRCh38 and features counted using cellranger (v3.1.0).

#wrkDir <- "/mnt/scratchb/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020/CaronBourque2020/grch38300"
#setwd(wrkDir)
projDir <- params$projDir
#projDirLink <- "/Users/baller01/MyMount/svr008ssd/20200511_FernandesM_ME_crukBiSs2020"
projDirLink <- gsub("/ssd/personal/baller01", "/Users/baller01/MyMount/svr008ssd", projDir)
inpDirBit <- params$inpDirBit # "AnaWiSeurat/Attempt1"
outDirBit <- params$outDirBit # "AnaWiSeurat/Attempt1"
plotDir <- "QcPlots"

# eg # cellrangerDirLink <- sprintf("%s/%s/grch38300", projDirLink, "CaronBourque2020")

3.2 10X cellranger pipeline in brief

Each sample was analysed separately with cellranger. This pipeline “is a set of analysis pipelines that process Chromium single-cell RNA-seq output to align reads, generate feature-barcode matrices and perform clustering and gene expression analysis.”

TODO Add code to call cellranger

3.3 sample sheet

# CaronBourque2020
cb_sampleSheetFn <- file.path(projDir, "Data/CaronBourque2020/SraRunTable.txt")
# Human Cell Atlas
hca_sampleSheetFn <- file.path(projDir, "Data/Hca/accList_Hca.txt")

# read sample sheet in:
splShtColToKeep <- c("Run", "Sample.Name", "source_name")

cb_sampleSheet <- read.table(cb_sampleSheetFn, header=T, sep=",")
hca_sampleSheet <- read.table(hca_sampleSheetFn, header=F, sep=",")
colnames(hca_sampleSheet) <- "Sample.Name"
hca_sampleSheet$Run <- hca_sampleSheet$Sample.Name
hca_sampleSheet$source_name <- "ABMMC" # adult BMMC

sampleSheetCat <- rbind(cb_sampleSheet[,splShtColToKeep], hca_sampleSheet[,splShtColToKeep])

sampleSheetCat %>%
    #DT::datatable(options = list(dom='t'))
    DT::datatable(options = list(pageLength = 10))

3.4 10X cellranger reports for CaronBourque2020

#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"

# make dir name for each sample of interest
# with 'Run' column

sampleSheet <- sampleSheetCat %>%
    filter(! source_name == "ABMMC")

cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "CaronBourque2020")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run

for(i in 1:length(htmlVec)){
    cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}

# TODO: add links to sample sheet and show with DT::datatable

3.5 10X cellranger reports for HCA’s adult BMMCs

#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"

# make dir name for each sample of interest
# with 'Run' column

sampleSheet <- sampleSheetCat %>%
    filter(source_name == "ABMMC")

cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "Hca")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run

for(i in 1:length(htmlVec)){
    cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}