-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgenreads.sh
78 lines (75 loc) · 2.42 KB
/
genreads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
#
# Generates simulated read data sets based on read error profiles
#
. common.sh
# ART
# pIRS http://www.ncbi.nlm.nih.gov/pubmed/22508794
ART_DIR=$BASE_DIR/tools/art
PATH=$PATH:$ART_DIR/art_illumina_dir/src:$ART_DIR/Linux64
# Generate simulated reads for each reference VCF
for VCF in $(ls_reference_vcf) ; do
echo "Simulating reads using ART default profiles"
for READ_LENGTH in $READ_LENGTHS; do
case $READ_LENGTH in
36 | 44 | 50 | 75)
QUAL_FILE_1=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpR${READ_LENGTH}R1.txt
QUAL_FILE_2=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpR${READ_LENGTH}R2.txt
;;
100 )
QUAL_FILE_1=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/Emp${READ_LENGTH}R1.txt
QUAL_FILE_2=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/Emp${READ_LENGTH}R2.txt
;;
150 | 200 | 250 | * )
QUAL_FILE_1=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpMiSeq250R1.txt
QUAL_FILE_2=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpMiSeq250R2.txt
;;
esac
# workaround: are pindel & crest short read calls bad due to quality filtering?
QUAL_FILE_1=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpMiSeq250R1.txt
QUAL_FILE_2=$ART_DIR/art_illumina_dir/Illumina_GAII_profiles/EmpMiSeq250R2.txt
if [ ! -f $QUAL_FILE_1 ] ; then
echo "Missing art base quality file $QUAL_FILE_1" 1>&2
continue
fi
if [ ! -f $QUAL_FILE_1 ] ; then
echo "Missing art base quality file $QUAL_FILE_1" 1>&2
continue
fi
for FRAG_SIZE in $FRAGMENT_SIZE ; do
for DEPTH in $READ_DEPTHS ; do
cx_load $VCF
CX_REFERENCE_VCF=$VCF
CX_REFERENCE_FA=${VCF%vcf}fa
CX_READ_LENGTH=$READ_LENGTH
CX_READ_FRAGMENT_LENGTH=$FRAG_SIZE
CX_READ_FRAGMENT_STDDEV=$(( CX_READ_FRAGMENT_LENGTH / 10))
CX_READ_DEPTH=$DEPTH
CX_READ_SIM="art"
CX_READ_SIM_QUAL_FILE=$QUAL_FILE_1
#CX_READ_SIM_SEED=12345
cx_save
XC_OUTPUT=$CX.1.fq
XC_SCRIPT="rm -rf $CX; mkdir $CX 2>/dev/null; cd $CX
art_illumina \
--paired \
--in $CX_REFERENCE_FA \
--out tmp. \
--noALN \
--len $READ_LENGTH \
--mflen $FRAG_SIZE \
--sdev $CX_READ_FRAGMENT_STDDEV \
--fcov $((CX_READ_DEPTH / 2)) \
--rndSeed 0 \
--qprof1 $QUAL_FILE_1 \
--qprof2 $QUAL_FILE_2 \
--id art &&
mv tmp.1.fq $CX.1.fq &&
mv tmp.2.fq $CX.2.fq &&
fastqc --threads 2 --nogroup -o $DATA_DIR -f fastq $CX.1.fq $CX.2.fq
"
xc_exec
done
done
done
done