# This file contains notes instructions to adapt the step-by-step.txt file to pair-end data. # Last version June 15, 2017. #################################################################### #################################################################### # Pair-end quality filter. # To run Trimmomatic in Pair end mode, it is necessary to create a text file "script.trimmo.PE.sh" containing the instructions to be executed. Make sure you are in the "WorkDir" folder and press: echo 'nohup java -jar $WD/tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE $1 $2 $1.trim.fil.pair.gz $1.trim.fil.unpair.gz $2.trim.fil.pair.gz $2.trim.fil.unpair.gz LEADING:20 TRAILING:20 AVGQUAL:25 SLIDINGWINDOW:10:30 MINLEN:36 > $1.trim.nohup ' >script.trimmo.PE.sh # Then, we will select all the FASTQ files by separete and execute Trimmomatic on each one. "–P2" indicates the number of threads that will be perform in parallel (2 in this case): ## This instruction assumes that your read 1 (_1) and read 2 (_2) end in .fastq.gz. ls *_1.fastq.gz >files_1 ls *_2.fastq.gz >files_2 # This instruction generates 4 files, the reads that are paired and those that are not paired by quality. paste files_1 files_2 | xargs -n2 -P2 sh script.trimmo.PE.sh #################################################################### #################################################################### # Pair-end align. # The following instructions create a simple text file named "script.align.PE.sh" containing the instructions for the pair-end reads alignment, this assumes that your library preparation is unstranded (For different library preparation see https://ccb.jhu.edu/software/hisat2/manual.shtml): echo 'nohup $WD/tools/hisat2-2.0.5/hisat2 -x $WD/TAIR10_Chr.all -1 $1.trim.fil.pair.gz -2 $2.trim.fil.pair.gz -S $1.sam > $1.align.stat.txt' > script.align.PE.sh paste files_1 files_2 | xargs -n2 -P2 sh script.align.PE.sh #################################################################### #################################################################### # Pair-end Gene assignment. #The following command runs the program using 2 threads and store the output in "fc0" object. This assumes that your library preparation is unstranded (For different library preparation see https://bioconductor.org/packages/release/bioc/html/Rsubread.html): fc0 <- featureCounts( sam.list, annot.ext= "Araport11_GFF3_genes_transposons.201606.gtf",isGTFAnnotationFile=T, allowMultiOverlap=T, isPairedEnd=T, nthreads=2, strandSpecific=0)