Answers

1.0.1

mkdir test_folder
# to find the smallest file in the FASTQ directory:
ls -lh FASTQ/
# copy the samllest file
cp FASTQ/SRR7889582.fastq.gz test_folder/.
# change the folder name (use mv, which is "move")
mv test_folder to_delete
# to remove the folder - '-r' for "recursive" and '-f' for "force":
rm -rf to_delete

1.2.1

# To access the manual:
man head
# use 'q' to exit

# head to get the top 'n' lines
# grep to search a file for lines containin a particular pattern of characters
# wc ('word count') to count, '-l' to only count lines
head -n 100 dm6.Ensembl.genes.gtf | grep CDS | wc -l
>35

2.1.1

grep ^\> Genome/dm6.fa | wc -l
>1870

2.1.2

# with grep '-v' returns the 'inverse' i.e. lines NOT containing the pattern
# '-m' with wc to count characters
grep -v ^\> Genome/dm6.fa | wc -m
>146601461

2.2.1

# use ls -l to list the files sizes, the smallest has the least reads and the largest has the most
# use '-S' to sort by size
ls -lS FASTQ

# Count reads by looking for the first line of each read entry using @SRR788
# Use '-c' with grep to count matching lines 
# use zcat to read the zipped (.gz) fastq files then pipe to grep
zcat FASTQ/SRR7889582.fastq.gz | grep -c @SRR7889
>1505731
zcat FASTQ/SRR7889581.fastq.gz | grep -c @SRR7889
>18381890

# To get the read length we could get the first read sequence from the second line of the file and count the characters
# 'tail' is the opposite of 'head'
zcat FASTQ/SRR7889581.fastq.gz | head -n 2 | tail -n 1 | wc -m

2.3.1 

# Done on the UCSC table browser

2.3.2

# "sort" reorders a file. The default is alphabetically by line.
# '-k 10' - sort on 10th column
# '-n ' sort numerically
# '-r' sort reverse (high to low)
sort -nr -k 10 dm6.Ensembl.genes.bed12 | head -n 5

2.3.3

# N.B. This should have read "CDS" not "gene", sorry

# 'cut' allows us to cut out a particular column - the default is to assume that columns are separated by tabs
cut -f 3 dm6.Ensembl.genes.gtf | grep CDS | wc -l
>17737

# An alternative method using the 'awk' programme
awk '$3=="CDS"' dm6.Ensembl.genes.gtf | wc -l
>17737

3.1.1

# Create directory
mkdir QC
# Run FASTQC
fastqc -o QC -t 7 FASTQ/SRR7889585.fastq.gz

4.1.1

mkdir -p Genome/Index
hisat2-build -p 7 Genome/dm6.fa Genome/Index/dm6

# This should create 8 dm6.X.ht2 files

4.2.1 

hisat2 -x Genome/Index/dm6 -U FASTQ/SRR7889582.fastq.gz -p 7 > hisat2/SRR7889582.sam

4.3.1

samtools view -bH hisat2/SRR7889582.sam > hisat2/SRR7889582.bam

4.4.1

samtools sort hisat2/SRR7889582.bam > hisat2/SRR7889582.sorted.bam
samtools index hisat2/SRR7889582.sorted.bam

4.5.1

featureCounts -a Gene_annotation/dm6.Ensembl.genes.gtf -o hisat2/SRR7889582.featuresCounts hisat2/SRR7889582.sorted.bam 

4.5.2

sort -nrk 7 hisat2/SRR7889582.featuresCounts | head -n 1
> FBtr0081639	chr3R;chr3R	7086599;7087234	7086744;7088839	+;+	1752	4530