common-workflow-library · mr-c · Dec 12, 2015 · Dec 12, 2015 · Dec 12, 2015 · Dec 12, 2015
diff --git a/tools/hall-lab-svtools-vcftobedpe.cwl b/tools/hall-lab-svtools-vcftobedpe.cwl
@@ -0,0 +1,33 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: "cwl:draft-3.dev2"
+
+class: CommandLineTool
+
+description: |
+  Usage: vcftobedbpe -i <in.vcf> -o [out.bedpe]
+
+requirements:
+  - "@import": envvar-global.cwl
+
+inputs:
+  - id: "#input"
+    type: File
+    description: |
+      "Input vcf file."
+    streamable: true
+    inputBinding:
+      prefix: "-i"
+
+stdout:
+  "output.bedpe"
+
+outputs:
+  - id: "#bedpe"
+    type: File
+    description: "The bedpe file"
+    streamable: true
+    outputBinding:
+      glob: "output.bedpe"
+
+baseCommand: ["vcftobedpe"]
diff --git a/tools/jobs/vawk-job.json b/tools/jobs/vawk-job.json
@@ -0,0 +1,7 @@
+{
+    "input": {
+        "class": "File",
+        "path": "../test-files/APGI2049_Tumor-manta.vcf"
+    },
+    "cmd": "{ print $1 }"
+}
diff --git a/tools/vawk.cwl b/tools/vawk.cwl
@@ -0,0 +1,68 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: "cwl:draft-3.dev2"
+
+class: CommandLineTool
+
+requirements:
+ - "@import": envvar-global.cwl
+
+description: |
+ usage: vawk [-h] [-v VAR] [-c INFO_COL] [--header] [--debug] cmd [vcf]
+ positional arguments:
+   cmd                   vawk command syntax is exactly the same as awk syntax with
+        a few additional features. The INFO field can be split using
+        the I$ prefix and the SAMPLE field can be split using
+        the S$ prefix. For example, I$AF prints the allele frequency of
+        each variant and S$NA12878 prints the entire SAMPLE field for the
+        NA12878 individual for each variant. S$* returns all samples.
+        The SAMPLE field can be further split based on the keys in the
+        FORMAT field of the VCF (column 9). For example, S$NA12877$GT
+        returns the genotype of the NA12878 individual.
+        ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }'
+   vcf                   VCF file (default: stdin)
+ optional arguments:
+   -h, --help            show this help message and exit
+   -v VAR, --var VAR     declare an external variable (e.g.: SIZE=10000)
+   -c INFO_COL, --col INFO_COL
+    column of the INFO field [8]
+   --header              print VCF header
+   --debug               debugging level verbosity
+
+inputs:
+  - id: "#cmd"
+    type: string
+    description: |
+        vawk command syntax is exactly the same as awk syntax with a few
+        additional features. The INFO field can be split using the I$ prefix
+        and the SAMPLE field can be split using the S$ prefix. For example,
+        I$AF prints the allele frequency of each variant and S$NA12878 prints
+        the entire SAMPLE field for the NA12878 individual for each variant.
+        S$* returns all samples. The SAMPLE field can be further split based on
+        the keys in the FORMAT field of the VCF (column 9). For example,
+        S$NA12877$GT returns the genotype of the NA12878 individual.
+        ex: '{ if (I$AF>0.5) print $1,$2,$3,I$AN,S$NA12878,S$NA12877$GT }'
+    inputBinding:
+      position: 1
+    streamable: true
+
+  - id: "#input"
+    type: File
+    description: |
+      VCF file
+    inputBinding:
+      position: 2
+
+stdout:
+   "output.vcf"
+
+outputs:
+  - id: "#processed"
+    type: File
+    description: "The resulting VCF file"
+    streamable: true
+    outputBinding:
+      glob: "output.vcf"
+
+
+baseCommand: ["vawk"]