forked from G100DKFZ/gene-is
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfigFile_targetedSequencing_pairedEnd.txt
115 lines (115 loc) · 4.5 KB
/
configFile_targetedSequencing_pairedEnd.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
########################################################################
####### Configuration File for Targeted Sequencing #######
####### (SureSelect/Agilent) Paired End Data Analysis #######
########################################################################
# Path to script directory (script directory is in main folder of GENE-IS_1.1-UGX)
scriptDir=$GENIS/scripts
#
# Path to libraries containing directory (lib directory is in main folder of GENE-IS_1.1-UGX)
libDir=$GENIS/lib
#
# Number of possible parallel alignments
threads=2
#
# Data analysis type is Targeted Sequencing (SureSelect/Agilent) (DO NOT CHANGE)
type=AGILENT
#
#
########################################################################
####### Input data files #######
########################################################################
# Path to both forward and reverse FASTQ files
forward=$GENIS/test/targetedSequencing/testData.TS.pair1.fastq.gz
reverse=$GENIS/test/targetedSequencing/testData.TS.pair2.fastq.gz
#
# Sample name PREFIX that will be used as prefix for final result files
# DO NOT include any space or symbols
sampleName=testDataTS
#
#
########################################################################
####### Quality filtration and adapter trimming parameters #######
########################################################################
# Quality filtration values (default = 20, integer values only)
qual=20
#
# Use default Illumina adapters to remove from raw fastq forward and reverse files or provide your own
# Adapter to trim from forward file
adaptF=GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
#
# Adapter to trim from reverse file
adaptR=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
#
# Default intermediate output files name (DO NOT CHANGE)
suffOut=filtTrim
#
#
########################################################################
####### Reference fasta file and indexed files #######
########################################################################
# Path and prefix of the BWA-indexed files for the reference+vector
# concatened genome and the vector reference genome alone in FASTA format
genomeVectorIndex=$GENIS/test/datasets/testGenomeVector.fa
genomeVector=$GENIS/test/datasets/testGenomeVector.fa
#
# Path to individual separate BWA based indexed fasta files for vector and for reference genome respectively
vectorIndexOut=$GENIS/test/datasets/VECTOR.fa
genomeIndexOut=$GENIS/test/datasets/testOnlyGenome.fa
#
# Path to the blat-indexed file for the reference+vector concatened genome
genomeVectorIndexBlat=$GENIS/test/datasets/testGenomeVector.fa.2bit
#
# Specify the exact vector name that is mentioned in the reference/vector fasta sequence file
vectorString=VECTOR
#
# Minimum alignment identity percentage for re-alignment step with BLAT (default value 95)
minIden=95
#
# This is the value between primary alignment and secondary alignment for a sequence read (default value 0.9)
alScore=0.95
#
# For topographical clustering that is genome IS position based clustering user can specify range of clustering (default value 10)
range=10
#
# This parameter regulates the sensibility/specificity of soft clip extraction 0=low, 1=medium, 2=high specificity
specificity=0
#
# The following parameter contain the alignment filename (DO NOT CHANGE)
alignmentOut=completAlignment
#
#
########################################################################
####### Third-party tools #######
########################################################################
# Path to BWA
aligner=$GENIS/tools/bin/bwa
#
# Path to secondary aligner. (BLAT or pblat)
blatAligner=$GENIS/tools/bin/blat
#
# Path to skewer
skewer=$GENIS/tools/bin/skewer
#
# Path to samtools
samtools=$GENIS/tools/bin/samtools
#
# Path to bedtools
bedTools=$GENIS/tools/bin/bedtools
#
#
########################################################################
# Path of annotation files (complete refSeq table in .txt)
# See manual on how to extract them from the UCSC browser
UCSCAnnoFile=$GENIS/test/datasets/UCSC.anno.table_hg38.txt
#
#
########################################################################
# For approx IS extraction (TRUE/FALSE)
approxIS=FALSE
#
#
########################################################################
# For extra stringent filtering of IS reads TRUE/FALSE
# (Recommended only in cases where vector contains transgene which is highly homologous to reference genome region)
# (It can also cause loss of real IS)
extraFilt=FALSE