# This file contains notes instructions to adapt the step-by-step.txt file to pair-end data.
# Last version June 15, 2017.


####################################################################
####################################################################

# Pair-end quality filter.

# To run Trimmomatic in Pair end mode, it is necessary to create a text file "script.trimmo.PE.sh" containing the instructions to be executed. Make sure you are in the "WorkDir" folder and press:


echo 'nohup java -jar $WD/tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE $1 $2 $1.trim.fil.pair.gz $1.trim.fil.unpair.gz  $2.trim.fil.pair.gz $2.trim.fil.unpair.gz  LEADING:20 TRAILING:20 AVGQUAL:25 SLIDINGWINDOW:10:30 MINLEN:36  > $1.trim.nohup ' >script.trimmo.PE.sh


# Then, we will select all the FASTQ files by separete and execute Trimmomatic on each one. "–P2" indicates the number of threads that will be perform in parallel (2 in this case): 

## This instruction assumes that your read 1 (_1) and read 2  (_2) end in .fastq.gz.

ls *_1.fastq.gz >files_1
ls *_2.fastq.gz >files_2

# This instruction generates 4 files, the reads that are paired and those that are not paired by quality.

paste files_1 files_2 | xargs -n2 -P2 sh script.trimmo.PE.sh

####################################################################
####################################################################

# Pair-end align.


# The following instructions create a simple text file named "script.align.PE.sh" containing the instructions for the pair-end reads alignment, this assumes that your library preparation is unstranded (For different library preparation see https://ccb.jhu.edu/software/hisat2/manual.shtml):

echo 'nohup $WD/tools/hisat2-2.0.5/hisat2 -x $WD/TAIR10_Chr.all -1 $1.trim.fil.pair.gz -2 $2.trim.fil.pair.gz   -S $1.sam > $1.align.stat.txt' > script.align.PE.sh


paste files_1 files_2 | xargs -n2 -P2 sh script.align.PE.sh

####################################################################
####################################################################


# Pair-end Gene assignment.

#The following command runs the program using 2 threads and store the output in "fc0" object. This assumes that your library preparation is unstranded (For different library preparation see https://bioconductor.org/packages/release/bioc/html/Rsubread.html):
 

fc0 <- featureCounts( sam.list, annot.ext= "Araport11_GFF3_genes_transposons.201606.gtf",isGTFAnnotationFile=T, allowMultiOverlap=T, isPairedEnd=T, nthreads=2, strandSpecific=0)