configFile_targetedSequencing_pairedEnd.txt

########################################################################
#######        Configuration File for Targeted Sequencing        #######
#######      (SureSelect/Agilent) Paired End Data Analysis       #######
########################################################################
# Path to script directory (script directory is in main folder of GENE-IS_1.1-UGX)
scriptDir=$GENIS/scripts
#
# Path to libraries containing directory (lib directory is in main folder of GENE-IS_1.1-UGX)
libDir=$GENIS/lib
#
# Number of possible parallel alignments
threads=2
#
# Data analysis type is Targeted Sequencing (SureSelect/Agilent) (DO NOT CHANGE)
type=AGILENT
#
#
########################################################################
#######                    Input data files                      #######
########################################################################
# Path to both forward and reverse FASTQ files
forward=$GENIS/test/targetedSequencing/testData.TS.pair1.fastq.gz
reverse=$GENIS/test/targetedSequencing/testData.TS.pair2.fastq.gz
#
# Sample name PREFIX that will be used as prefix for final result files
# DO NOT include any space or symbols
sampleName=testDataTS
#
#
########################################################################
#######    Quality filtration and adapter trimming parameters    #######
########################################################################
# Quality filtration values (default = 20, integer values only)
qual=20
#
# Use default Illumina adapters to remove from raw fastq forward and reverse files or provide your own
# Adapter to trim from forward file
adaptF=GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
#
# Adapter to trim from reverse file
adaptR=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
#
# Default intermediate output files name (DO NOT CHANGE)
suffOut=filtTrim
#
#
########################################################################
#######         Reference fasta file and indexed files           #######
########################################################################
# Path and prefix of the BWA-indexed files for the reference+vector
# concatened genome and the vector reference genome alone in FASTA format
genomeVectorIndex=$GENIS/test/datasets/testGenomeVector.fa 
genomeVector=$GENIS/test/datasets/testGenomeVector.fa
#
# Path to individual separate BWA based indexed fasta files for vector and for reference genome respectively
vectorIndexOut=$GENIS/test/datasets/VECTOR.fa
genomeIndexOut=$GENIS/test/datasets/testOnlyGenome.fa
#
# Path to the blat-indexed file for the reference+vector concatened genome
genomeVectorIndexBlat=$GENIS/test/datasets/testGenomeVector.fa.2bit
#
# Specify the exact vector name that is mentioned in the reference/vector fasta sequence file
vectorString=VECTOR
#
# Minimum alignment identity percentage for re-alignment step with BLAT (default value 95)
minIden=95
#
# This is the value between primary alignment and secondary alignment for a sequence read (default value 0.9)
alScore=0.95
#
# For topographical clustering that is genome IS position based clustering user can specify range of clustering (default value 10)
range=10
#
# This parameter regulates the sensibility/specificity of soft clip extraction 0=low, 1=medium, 2=high specificity
specificity=0
#
# The following parameter contain the alignment filename (DO NOT CHANGE)
alignmentOut=completAlignment
#
#
########################################################################
#######                   Third-party tools                      #######
########################################################################
# Path to BWA
aligner=$GENIS/tools/bin/bwa
#
# Path to secondary aligner. (BLAT or pblat)
blatAligner=$GENIS/tools/bin/blat
#
# Path to skewer
skewer=$GENIS/tools/bin/skewer
#
# Path to samtools
samtools=$GENIS/tools/bin/samtools
#
# Path to bedtools
bedTools=$GENIS/tools/bin/bedtools
#
#
########################################################################
# Path of annotation files (complete refSeq table in .txt)
# See manual on how to extract them from the UCSC browser
UCSCAnnoFile=$GENIS/test/datasets/UCSC.anno.table_hg38.txt
#
#
########################################################################
# For approx IS extraction (TRUE/FALSE)
approxIS=FALSE
#
#
########################################################################
# For extra stringent filtering of IS reads TRUE/FALSE
# (Recommended only in cases where vector contains transgene which is highly homologous to reference genome region)
# (It can also cause loss of real IS)
extraFilt=FALSE