Merge pull request #91 from EliLillyCo/release/1.2.0

Release/1.2.0
EliLillyCo · Dec 4, 2019 · 30d4af9 · 30d4af9
2 parents 6331ac3 + 8e469a8
commit 30d4af9
Show file tree

Hide file tree

Showing 44 changed files with 12,372 additions and 196 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ docs/source/modules.rst
 docs/source/pytest_wdl.rst
 docs/source/pytest_wdl.data_types.rst
 docs/source/pytest_wdl.executors.rst
+docs/source/pytest_wdl.url_schemes.rst
 
 # Environments
 .env

diff --git a/.travis.yml b/.travis.yml
@@ -5,6 +5,7 @@ addons:
   apt:
     packages:
     - openjdk-8-jdk
+    - docker-ce
 services:
 - docker
 cache:
@@ -13,6 +14,7 @@ cache:
 python:
 - '3.6'
 - '3.7'
+#- '3.8'
 install:
 - pip install --upgrade pip wheel
 - pip install -r requirements.txt

diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,16 @@
 # Changes
 
+## v1.2.0 (2019.12.04)
+
+* Fix #86 - enable test_data.json file to be located in the same directory as the WDL file
+* When comparing BAM files, by default only compare HD, SQ, and RG headers
+* Enhance the error message that is displayed when a workflow fails
+* Add ability to validate data file digests
+* Optionally show progress bar when downloading data file
+* Update miniwdl minimum version to 0.5.2, and update the miniwdl executor to use `docker swarm`
+* Update xphyle minimum version to 4.1.3
+* Other bugfixes
+
 ## v1.1.1 (2019.09.27)
 
 * Fixes `license` entry in `setup.py` for proper rendering to release to PyPI.

diff --git a/README.md b/README.md
@@ -2,11 +2,13 @@
 [![Code Coverage](https://codecov.io/gh/elilillyco/pytest-wdl/branch/master/graph/badge.svg)](https://codecov.io/gh/elilillyco/pytest-wdl)
 [![Documentation Status](https://readthedocs.org/projects/pytest-wdl/badge/?version=latest)](https://pytest-wdl.readthedocs.io/en/latest/?badge=latest)
 
+<img width="200" alt="logo" src="docs/source/logo.png"/>
+
 This package is a plugin for the [pytest](https://docs.pytest.org/en/latest/) unit testing framework that enables testing of workflows written in [Workflow Description Language](https://github.com/openwdl).
 
 ## Dependencies
 
-* Python 3.6+
+* Python 3.6 or 3.7 (3.8 is not yet fully supported)
 * Java 1.8+
 * [Cromwell](https://github.com/broadinstitute/cromwell/releases/tag/38) JAR file
 * [Docker](https://www.docker.com/get-started) daemon (if your WDL tasks depend on Docker images)

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -3,6 +3,8 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
+.. image:: logo.png
+
 Welcome to pytest-wdl's documentation!
 ======================================
 

diff --git a/docs/source/logo.png b/docs/source/logo.png
diff --git a/docs/source/manual.md b/docs/source/manual.md
@@ -111,6 +111,9 @@ As a short-cut, the "class" attribute can be omitted and the map describing the
     "url": "http://example.com/my.bam",
     "http_headers": {
       "auth_token": "TOKEN"
+    },
+    "digests": {
+      "md5": "8db3048a86e16a08d2d8341d1c72fecb"
     }
   },
   "reference": {
@@ -140,6 +143,7 @@ The available keys for configuring file inputs/outputs are:
         * `env`: The name of an environment variable in which to look up the header value.
         * `value`: The header value; only used if an environment variable is not specified or is unset.
 * `contents`: The contents of the file, specified as a string. The file is written to `path` the first time it is requested.
+* `digests`: Optional mapping of hash algorithm name to digest. These are digests that have been computed on the remote file and are used to validate the downloaded file. Currently only used for files resolved from URLs.
 
 In addition, the following keys are recognized for output files only:
 

diff --git a/pytest_wdl/__init__.py b/pytest_wdl/__init__.py
@@ -18,6 +18,8 @@
 module.
 """
 from pytest_wdl import fixtures
+from pytest_wdl.executors import ExecutionFailedError
+
 import pytest
 
 

diff --git a/pytest_wdl/config.py b/pytest_wdl/config.py
@@ -19,8 +19,6 @@
 import tempfile
 from typing import Dict, List, Optional, Union
 
-from xphyle import open_
-
 from pytest_wdl.utils import ensure_path, env_map
 
 
@@ -80,7 +78,7 @@ def __init__(
         executor_defaults: Optional[Dict[str, dict]] = None,
     ):
         if config_file:
-            with open_(config_file, "rt") as inp:
+            with open(config_file, "rt") as inp:
                 defaults = json.load(inp)
         else:
             defaults = {}
@@ -160,4 +158,4 @@ def cleanup(self) -> None:
         `self.remove_cache_dir` is True.
         """
         if self.remove_cache_dir:
-            shutil.rmtree(self.cache_dir)
+            shutil.rmtree(self.cache_dir, ignore_errors=True)
diff --git a/pytest_wdl/core.py b/pytest_wdl/core.py
@@ -101,23 +101,27 @@ def __init__(self, data_descriptors: dict, user_config: UserConfiguration):
         self.user_config = user_config
 
     def resolve(self, name: str, datadirs: Optional[DataDirs] = None):
-        if name not in self.data_descriptors:
-            raise ValueError(f"Unrecognized name {name}")
-
-        value = self.data_descriptors[name]
-
-        if isinstance(value, dict):
-            # Right now, "class" is just a marker for object types, of which
-            # "file" is a special case.
-            cls = value.get("class", "file")
-            if "value" in value:
-                value = value["value"]
-            if cls == "file":
-                return create_data_file(
-                    user_config=self.user_config,
-                    datadirs=datadirs,
-                    **cast(dict, value)
-                )
+        if name in self.data_descriptors:
+            value = self.data_descriptors[name]
+
+            if isinstance(value, dict):
+                # Right now, "class" is just a marker for object types, of which
+                # "file" is a special case.
+                cls = value.get("class", "file")
+                if "value" in value:
+                    value = value["value"]
+                if cls == "file":
+                    value = create_data_file(
+                        user_config=self.user_config,
+                        datadirs=datadirs,
+                        **cast(dict, value)
+                    )
+        else:
+            value = create_data_file(
+                name=name,
+                user_config=self.user_config,
+                datadirs=datadirs
+            )
 
         return value
 
@@ -167,8 +171,9 @@ def create_data_file(
     url: Optional[str] = None,
     contents: Optional[Union[str, dict]] = None,
     env: Optional[str] = None,
-    datadirs: Optional[DataDirs] = None,
     http_headers: Optional[dict] = None,
+    digests: Optional[dict] = None,
+    datadirs: Optional[DataDirs] = None,
     **kwargs
 ) -> DataFile:
     if isinstance(type, dict):
@@ -193,7 +198,7 @@ def create_data_file(
         else:
             localizer = LinkLocalizer(env_path)
     elif url:
-        localizer = UrlLocalizer(url, user_config, http_headers)
+        localizer = UrlLocalizer(url, user_config, http_headers, digests)
         if not local_path:
             if name:
                 local_path = ensure_path(user_config.cache_dir / name)

diff --git a/pytest_wdl/data_types/__init__.py b/pytest_wdl/data_types/__init__.py
@@ -12,15 +12,13 @@
 #    See the License for the specific language governing permissions and
 #    limitations under the License.
 from abc import ABCMeta, abstractmethod
-import hashlib
 from pathlib import Path
 from typing import Callable, Optional, Union, cast
 
 import subby
-from xphyle import open_
 
 from pytest_wdl.localizers import Localizer
-from pytest_wdl.utils import tempdir
+from pytest_wdl.utils import tempdir, compare_files_with_hash
 from xphyle import guess_file_format
 from xphyle.utils import transcode_file
 
@@ -56,7 +54,13 @@ def __init__(
     @property
     def path(self) -> Path:
         if not self.local_path.exists():
-            self.localizer.localize(self.local_path)
+            if self.localizer:
+                self.localizer.localize(self.local_path)
+            else:
+                raise RuntimeError(
+                    f"Localization to {self.local_path} is required but no localizer "
+                    f"is defined"
+                )
         return self.local_path
 
     def __str__(self) -> str:
@@ -173,7 +177,7 @@ def assert_text_files_equal(
 def compare_gzip(file1: Path, file2: Path):
     crc_size1 = subby.sub(f"gzip -lv {file1} | tail -1 | awk '{{print $2\":\"$7}}'")
     crc_size2 = subby.sub(f"gzip -lv {file2} | tail -1 | awk '{{print $2\":\"$7}}'")
-    if crc_size1 != crc_size2:
+    if crc_size1 != crc_size2:  # TODO: test this
         raise AssertionError(
             f"CRCs and/or uncompressed sizes differ between expected identical "
             f"gzip files {file1}, {file2}"
@@ -187,21 +191,9 @@ def compare_gzip(file1: Path, file2: Path):
 }
 
 
-def assert_binary_files_equal(
-    file1: Path,
-    file2: Path,
-    hash_fn: Callable[[bytes], hashlib._hashlib.HASH] = hashlib.md5
-) -> None:
+def assert_binary_files_equal(file1: Path, file2: Path, digest: str = "md5") -> None:
     fmt = guess_file_format(file1)
     if fmt and fmt in BINARY_COMPARATORS:
         BINARY_COMPARATORS[fmt](file1, file2)
     else:
-        with open_(file1, "rb") as inp1:
-            file1_md5 = hash_fn(inp1.read()).hexdigest()
-        with open_(file2, "rb") as inp2:
-            file2_md5 = hash_fn(inp2.read()).hexdigest()
-        if file1_md5 != file2_md5:
-            raise AssertionError(
-                f"MD5 hashes differ between expected identical files "
-                f"{file1}, {file2}"
-            )
+        compare_files_with_hash(file1, file2, digest)
diff --git a/pytest_wdl/data_types/bam.py b/pytest_wdl/data_types/bam.py
@@ -19,17 +19,17 @@
 from functools import partial
 from pathlib import Path
 import re
-from typing import Optional
+from typing import Iterable, Optional
 
 import subby
-from xphyle import open_
 
 from pytest_wdl.data_types import DataFile, assert_text_files_equal, diff_default
 from pytest_wdl.utils import tempdir
 
-try:  # pragma: no-cover
+# TODO: fall back to command line samtools (if installed)
+try:
     import pysam
-except ImportError:
+except ImportError:  # pragma: no-cover
     raise ImportError(
         "Failed to import dependencies for bam type. To add support for BAM files, "
         "install the plugin with pip install pytest-wdl[bam]"
@@ -109,13 +109,13 @@ def assert_bam_files_equal(
         bam_to_sam(
             file1,
             cmp_file1,
-            headers=False,
+            headers=None,
             sorting=Sorting.NAME
         )
         bam_to_sam(
             file2,
             cmp_file2,
-            headers=False,
+            headers=None,
             sorting=Sorting.NAME
         )
         assert_text_files_equal(
@@ -131,14 +131,12 @@ def assert_bam_files_equal(
         bam_to_sam(
             file1,
             cmp_file1,
-            headers=True,
             min_mapq=min_mapq,
             sorting=Sorting.COORDINATE,
         )
         bam_to_sam(
             file2,
             cmp_file2,
-            headers=True,
             min_mapq=min_mapq,
             sorting=Sorting.COORDINATE
         )
@@ -157,7 +155,7 @@ def assert_bam_files_equal(
 def bam_to_sam(
     input_bam: Path,
     output_sam: Path,
-    headers: bool = True,
+    headers: Optional[Iterable[str]] = ("HD", "SQ", "RG"),
     min_mapq: Optional[int] = None,
     sorting: Sorting = Sorting.NONE
 ):
@@ -167,34 +165,39 @@ def bam_to_sam(
     opts = []
     if headers:
         opts.append("-h")
+        headers = set(headers)
     if min_mapq:
         opts.extend(["-q", str(min_mapq)])
     sam = pysam.view(*opts, str(input_bam)).rstrip()
     # Replace any randomly assigned readgroups with a common placeholder
     sam = re.sub(r"UNSET-\w*\b", "UNSET-placeholder", sam)
 
+    lines = sam.splitlines(keepends=True)
+    header_lines = []
+    start = 0
+    if headers:
+        for i, line in enumerate(lines):
+            if not line.startswith("@"):
+                start = i
+                break
+            elif line[1:3] in headers:
+                header_lines.append(line)
+
+    body_lines = lines[start:]
     if sorting is not Sorting.NONE:
-        lines = sam.splitlines(keepends=True)
-        start = 0
-        if headers:
-            for i, line in enumerate(lines):
-                if not line.startswith("@"):
-                    start = i
-                    break
-
         with tempdir() as temp:
             temp_sam = temp / f"output_{str(output_sam.stem)}.sam"
-            with open_(temp_sam, "w") as out:
-                out.write("".join(lines[start:]))
+            with open(temp_sam, "w") as out:
+                out.write("".join(body_lines))
             if sorting is Sorting.COORDINATE:
                 sort_cols = "-k3,3 -k4,4n -k2,2n"
             else:
                 sort_cols = "-k1,1 -k2,2n"
             sorted_sam = subby.sub(f"cat {str(temp_sam)} | sort {sort_cols}")
-            lines = lines[:start] + [sorted_sam]
+            body_lines = [sorted_sam]
 
-    with open_(output_sam, "w") as out:
-        out.write("".join(lines))
+    with open(output_sam, "w") as out:
+        out.write("".join(header_lines + body_lines))
 
 
 def diff_bam_columns(file1: Path, file2: Path, columns: str) -> int:

diff --git a/pytest_wdl/data_types/json.py b/pytest_wdl/data_types/json.py
@@ -1,19 +1,17 @@
 import json
 from pathlib import Path
 
-from xphyle import open_
-
 from pytest_wdl.data_types import DataFile
 
 
 class JsonDataFile(DataFile):
     def _assert_contents_equal(self, other_path: Path, other_opts: dict) -> None:
-        with open_(self.path, "rt") as inp:
+        with open(self.path, "rt") as inp:
             try:
                 j1 = json.load(inp)
             except json.decoder.JSONDecodeError:
                 raise AssertionError(f"Invalid JSON file {self.path}")
-        with open_(other_path, "rt") as inp:
+        with open(other_path, "rt") as inp:
             try:
                 j2 = json.load(inp)
             except json.decoder.JSONDecodeError:

diff --git a/pytest_wdl/data_types/vcf.py b/pytest_wdl/data_types/vcf.py
@@ -23,7 +23,6 @@
 import re
 
 import subby
-from xphyle import open_
 
 from pytest_wdl.data_types import DataFile, assert_text_files_equal, diff_default
 from pytest_wdl.utils import tempdir
@@ -56,7 +55,7 @@ def diff_vcf_columns(file1: Path, file2: Path, compare_phase: bool = False) -> i
         def make_comparable(infile, outfile):
             cmd = ["grep -vE '^#'", "cut -f 1-5,7,10", "cut -d ':' -f 1"]
             output = subby.sub(cmd, stdin=infile)
-            with open_(outfile, "wt") as out:
+            with open(outfile, "wt") as out:
                 if compare_phase:
                     out.write(output)
                 else:
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,8 @@ @@
     module.
     """
     from pytest_wdl import fixtures
+    from pytest_wdl.executors import ExecutionFailedError
     import pytest
@@ Expand Down @@