-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDelter.py
90 lines (79 loc) · 3.71 KB
/
Delter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
configfile: "Delter.config.yaml"
vcf=config["Vcf"] #240225 updated
refseq=config["Refseq"] #240225 updated
outdir=config["Outdir"] #240225 updated
BAM=config["Bam"] #240225 updated
flowcell=config["Flowcell"] #240429 updated
strategy=config["Strategy"] #240429 updated
mrppthres=config["MRPPthres"] #240620 updated
homoQthres=config["HomoQthres"] #240620 updated
otherQthres=config["OtherQthres"] #240620 updated
ref_name=config["Ref"]
num=config["Num"]
tombodir=config["Tombo_dir"]
subsample=config["Subsample"]
print("Usage:")
print("snakemake -s Delter.py --cores 8 --config Ref=refname Num=5 Vcf=path/to/VCF Refseq=path/to/refseq Outdir=path/to/outputdir Bam=path/to/sorted/bam Tombo_dir=path/to/tombo_processed/fast5 Subsample=2000 Flowcell=R9 Strategy=Direct MRPPthres=0.001 HomoQthres=23 OtherQthres=20.6")
print("Ref=refname".ljust(40)+"The value of #CHROM in vcf file, e.g., 'Ref=chr1'")
print("Num=5".ljust(40)+"The number of bases up- and down-stream that are centered around the variation loci, default=5")
print("Vcf=path/to/VCF".ljust(40)+"The file path to vcf file, e.g., 'Vcf=/data/res/lofreq.vcf'")
print("Refseq=path/to/refseq".ljust(40)+"The file path to reference sequence, e.g., 'Refseq=/database/COVID-19.fa'")
print("Outdir=path/to/outputdir".ljust(40)+"The file path storing the output results and intermediate files, e.g., 'Outdir=/data/res'")
print("Bam=path/to/sorted/bam".ljust(40)+"The file path to sorted bam files, e.g., 'Bam=/data/res/sorted.bam'")
print("Tombo_dir=path/to/tombo_processed/fast5".ljust(40)+"The file path to tombo-resquiggled single fats5 files, e.g., 'Tombo_dir=/data/fast5'")
print("Subsample=2000".ljust(40)+"The number to subsample from reads covering variation loci, should be larger than 200, default=2000")
print("Flowcell=R9".ljust(40)+"The version of flow cell, should be R9 or R10, default=R9")
print("Strategy=Direct".ljust(40)+"The sequencing strategy, should be Amplicon or Direct, default=Direct")
print("MRPPthres=0.001".ljust(40)+"The threshold of MRPP A, default=0.001")
print("HomoQthres=23".ljust(40)+"The threshold of homo-dels, default=23")
print("OtherQthres=20.6".ljust(40)+"The threshold of other-dels, default=20.6")
rule all:
input:
str(outdir) + "/" + "run.log"
rule vcf2delinfo:
input:
str(refseq),
str(vcf)
output:
str(outdir) + "/" + "variant.info.txt",
str(outdir) + "/" + "targetpos.txt",
str(outdir) + "/" + "startpos.txt",
str(outdir) + "/" + "endpos.txt",
str(outdir) + "/" + "dellen.txt",
str(outdir) + "/" + "reglen.txt",
str(outdir) + "/" + "location.txt",
str(outdir) + "/" + "metric.txt"
params:
opts1=str(flowcell),
opts2=str(strategy)
benchmark:
str(outdir) + "/" + "benchmarks/vcf2delinfo.benchmark.txt"
script:
"scripts/vcf2delinfo-v2.sh"
rule delinfo2Signal_Qinfo:
input:
str(BAM),
str(outdir) + "/" + "targetpos.txt",
str(outdir) + "/" + "startpos.txt",
str(outdir) + "/" + "endpos.txt",
str(outdir) + "/" + "dellen.txt",
str(outdir) + "/" + "reglen.txt",
str(outdir) + "/" + "metric.txt"
output:
str(outdir) + "/" + "target.upstream_downstream.bases.comparison.result.txt",
str(outdir) + "/" + "fq.Qscore.info.txt",
str(outdir) + "/" + "run.log"
params:
opts1=str(ref_name),
opts2=str(num),
opts3=str(outdir),
opts4=str(tombodir),
opts5=str(subsample),
opts6=str(strategy),
opts7=str(mrppthres),
opts8=str(homoQthres),
opts9=str(otherQthres)
benchmark:
str(outdir) + "/" + "benchmarks/delinfo2Signal_Qinfo.benchmark.txt"
script:
"scripts/delinfo2Signal+Qinfo.sh"