Chapter 3 Alignment and feature counting

3.1 Introduction

We will use two sets of Bone Marrow Mononuclear Cells (BMMC):

  • ‘CaronBourque2020’: pediatric samples
  • ‘Hca’: HCA Census of Immune Cells for adult BMMCs

Fastq files were retrieved from publicly available archive (SRA and HCA).

Sequencing quality was assessed and visualised using fastQC and MultiQC.

Reads were aligned against GRCh38 and features counted using cellranger (v3.1.0).

#wrkDir <- "/mnt/scratchb/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020/CaronBourque2020/grch38300"
#setwd(wrkDir)
projDir <- params$projDir
#projDirLink <- "/Users/baller01/MyMount/svr008ssd/20200511_FernandesM_ME_crukBiSs2020"
projDirLink <- gsub("/ssd/personal/baller01", "/Users/baller01/MyMount/svr008ssd", projDir)
inpDirBit <- params$inpDirBit # "AnaWiSeurat/Attempt1"
outDirBit <- params$outDirBit # "AnaWiSeurat/Attempt1"
plotDir <- "QcPlots"

# eg # cellrangerDirLink <- sprintf("%s/%s/grch38300", projDirLink, "CaronBourque2020")

3.2 10X cellranger pipeline in brief

Each sample was analysed separately with cellranger. This pipeline “is a set of analysis pipelines that process Chromium single-cell RNA-seq output to align reads, generate feature-barcode matrices and perform clustering and gene expression analysis.”

TODO Add code to call cellranger

3.3 sample sheet

# CaronBourque2020
cb_sampleSheetFn <- file.path(projDir, "Data/CaronBourque2020/SraRunTable.txt")
# Human Cell Atlas
hca_sampleSheetFn <- file.path(projDir, "Data/Hca/accList_Hca.txt")

# read sample sheet in:
splShtColToKeep <- c("Run", "Sample.Name", "source_name")

cb_sampleSheet <- read.table(cb_sampleSheetFn, header=T, sep=",")
hca_sampleSheet <- read.table(hca_sampleSheetFn, header=F, sep=",")
colnames(hca_sampleSheet) <- "Sample.Name"
hca_sampleSheet$Run <- hca_sampleSheet$Sample.Name
hca_sampleSheet$source_name <- "ABMMC" # adult BMMC

sampleSheetCat <- rbind(cb_sampleSheet[,splShtColToKeep], hca_sampleSheet[,splShtColToKeep])
sampleSheetCat %>%
    #DT::datatable(options = list(dom='t'))
    DT::datatable(options = list(pageLength = 10))

3.4 10X cellranger reports for CaronBourque2020

#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"

# make dir name for each sample of interest
# with 'Run' column

sampleSheet <- sampleSheetCat %>%
    filter(! source_name == "ABMMC")

cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "CaronBourque2020")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run

for(i in 1:length(htmlVec)){
    cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}

SRR9264343

SRR9264344

SRR9264345

SRR9264346

SRR9264347

SRR9264348

SRR9264349

SRR9264350

SRR9264351

SRR9264352

SRR9264353

SRR9264354

# TODO: add links to sample sheet and show with DT::datatable

3.5 10X cellranger reports for HCA’s adult BMMCs

#cellrangerDir <- sprintf("%s/%s/grch38300", projDir, "CaronBourque2020")
#projDirOsx <- "/Users/baller01/MyMount/clust1b/20200511_FernandesM_ME_crukBiSs2020"

# make dir name for each sample of interest
# with 'Run' column

sampleSheet <- sampleSheetCat %>%
    filter(source_name == "ABMMC")

cellrangerDirLink <- sprintf("%s/Data/%s/grch38300", projDirLink, "Hca")
htmlVec <- sprintf("%s/%s/%s/outs/web_summary.html", cellrangerDirLink, sampleSheet$Run, sampleSheet$Run)
names(htmlVec) <- sampleSheet$Run

for(i in 1:length(htmlVec)){
    cat("[", names(htmlVec)[i], "](", htmlVec[i],")\n\n")
}

MantonBM1

MantonBM2

MantonBM3

MantonBM4

MantonBM5

MantonBM6

MantonBM7

MantonBM8