#qcPlotDirBit <- "NormPlots"
#setNameUpp <- "Caron"
projDir <- "/mnt/scratchb/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020"
outDirBit <- "AnaWiSce/Attempt1"
library(knitr)

1 Identifying confounding factors - Caron set

normPlotDirBit <- "NormPlots" # "ConfoundPlots"
#setNameUpp <- "Caron"
#setNameLow <- "caron"
setName <- tolower("Caron")

caron

projDir <- "/mnt/scratcha/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020"
outDirBit <- "AnaWiSce/Attempt1"

Load object

setSuf <- "_5hCellPerSpl"
# Read object in:
tmpFn <- sprintf("%s/%s/Robjects/%s_sce_nz_postDeconv%s.Rds", projDir, outDirBit, setName, setSuf)
tmpFn
[1] "/mnt/scratcha/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020/AnaWiSce/Attempt1/Robjects/caron_sce_nz_postDeconv_5hCellPerSpl.Rds"
/mnt/scratcha/bioinformatics/baller01/20200511_FernandesM_ME_crukBiSs2020/AnaWiSce/Attempt1/Robjects/caron_sce_nz_postDeconv_5hCellPerSpl.Rds
sce <- readRDS(tmpFn)
sce
class: SingleCellExperiment 
dim: 18372 5500 
metadata(0):
assays(2): counts logcounts
rownames(18372): ENSG00000238009 ENSG00000237491 ... ENSG00000275063
  ENSG00000271254
rowData names(11): ensembl_gene_id external_gene_name ... detected
  gene_sparsity
colnames: NULL
colData names(20): Sample Barcode ... cell_sparsity sizeFactor
reducedDimNames(0):
altExpNames(0):

Remember scran PCA:

Normalised counts are stored in ‘logcounts’ assay

typeNorm <- "scran"
# 
scranPca <- runPCA(
  sce,
  exprs_values = "logcounts"
)

PCA plot for the ‘scran’ counts in the caron set.

tmpFn <- sprintf("%s/%s/%s/%s_sce_nz_postQc%s_%sPca.png",
         projDir, outDirBit, normPlotDirBit, setName, setSuf, typeNorm)
tmpFn
knitr::include_graphics(tmpFn, auto_pdf = TRUE)
#options(BiocSingularParam.default=IrlbaParam())
options(BiocSingularParam.default=ExactParam())

qclust <- quickCluster(sce, min.size = 30, use.ranks = FALSE)
sce <- computeSumFactors(sce, sizes = 15, clusters = qclust)
sce <- logNormCounts(sce)

Perform PCA:

reducedDim(sce, "PCA") <- reducedDim(
  runPCA(sce, exprs_values = "logcounts", ncomponents = 10), "PCA")

plotPCA(
    sce,
    colour_by = "Sample.Name",
    size_by = "sum",
    shape_by = "source_name"
)

assay(sce, "logcounts_raw") <- log2(counts(sce)+1)
# on norm count https://biocellgen-public.svi.edu.au/mig_2019_scrnaseq-workshop/public/normalization-confounders-and-batch-correction.html#identifying-confounding-factors
# on logcounts_raw https://scrnaseq-course.cog.sanger.ac.uk/website/cleaning-the-expression-matrix.html#correlations-with-pcs
# a bit long
explanPc <- getExplanatoryPCs(sce,
    exprs_values = "logcounts_raw",
    variables = c(
        "sum",
    "detected",
        "source_name",
        "Sample.Name",
    "subsets_Mito_percent"
    )
)
plotExplanatoryPCs(explanPc/100) 
# on logcounts_raw
# https://biocellgen-public.svi.edu.au/mig_2019_scrnaseq-workshop/public/normalization-confounders-and-batch-correction.html#identifying-confounding-factors

plotExplanatoryVariables(
    sce,
    exprs_values = "logcounts_raw",
    #exprs_values = "counts",
    #exprs_values = "logcounts",
    variables = c(
        "sum",
    "detected",
        "source_name",
        "Sample.Name",
    "subsets_Mito_percent"
    )
)

Correlation with PCs: logcounts (normalised):

# on norm count https://biocellgen-public.svi.edu.au/mig_2019_scrnaseq-workshop/public/normalization-confounders-and-batch-correction.html#identifying-confounding-factors
# on logcounts_raw https://scrnaseq-course.cog.sanger.ac.uk/website/cleaning-the-expression-matrix.html#correlations-with-pcs
# a bit long
explanPc <- getExplanatoryPCs(sce,
    #exprs_values = "logcounts", # default
    variables = c(
        "sum",
        "detected",
        "source_name",
        "Sample.Name",
        "subsets_Mito_percent"
    )
)
plotExplanatoryPCs(explanPc/100) 

Explanatory variables: logcounts_raw:

# on logcounts_raw
# https://biocellgen-public.svi.edu.au/mig_2019_scrnaseq-workshop/public/normalization-confounders-and-batch-correction.html#identifying-confounding-factors

plotExplanatoryVariables(
    sce,
    # exprs_values = "logcounts", # default
    variables = c(
        "sum",
        "detected",
        "source_name",
        "Sample.Name",
        "subsets_Mito_percent"
    )
)
Warning: Removed 295 rows containing non-finite values (stat_density).

LS0tCnRpdGxlOiAiQ1JVSyBDSSBTdW1tZXIgU2Nob29sIDIwMjAgLSBpbnRyb2R1Y3Rpb24gdG8gc2luZ2xlLWNlbGwgUk5BLXNlcSBhbmFseXNpcyIKc3VidGl0bGU6ICdJZGVudGlmeW5nIGNvbmZvdW5kaW5nIGZhY3RvcnMnCgphdXRob3I6ICJTdGVwaGFuZSBCYWxsZXJlYXUiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgY29kZV9mb2xkaW5nOiBoaWRlCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogaGlkZQogIGh0bWxfYm9vazoKICAgIGNvZGVfZm9sZGluZzogaGlkZQpwYXJhbXM6CiAgb3V0RGlyQml0OiAiQW5hV2lTY2UvQXR0ZW1wdDEiCi0tLQoKYGBge3IgdmFyaWFibGVzX25vcm19CiNxY1Bsb3REaXJCaXQgPC0gIk5vcm1QbG90cyIKI3NldE5hbWVVcHAgPC0gIkNhcm9uIgpwcm9qRGlyIDwtICIvbW50L3NjcmF0Y2hiL2Jpb2luZm9ybWF0aWNzL2JhbGxlcjAxLzIwMjAwNTExX0Zlcm5hbmRlc01fTUVfY3J1a0JpU3MyMDIwIgpvdXREaXJCaXQgPC0gIkFuYVdpU2NlL0F0dGVtcHQxIgpgYGAKCmBgYHtyfQpsaWJyYXJ5KGtuaXRyKQpgYGAKCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQojc3JjIDwtIGxhcHBseShjKCJDYXJvbiIsICJIY2EiKSwgZnVuY3Rpb24oc2V0TmFtZVVwcCkga25pdF9leHBhbmQoZmlsZSA9ICJ0ZXN0LlJtZCIpKQpzcmMgPC0gbGFwcGx5KGMoIkNhcm9uIiksIGZ1bmN0aW9uKHNldE5hbWVVcHApIGtuaXRfZXhwYW5kKGZpbGUgPSAiY29uZm91bmRpbmcuUm1kIikpCmBgYAoKYHIga25pdCh0ZXh0ID0gdW5saXN0KHNyYykpYAoKCgo=