Skip to content

Commit

Permalink
Merge pull request #91 from EliLillyCo/release/1.2.0
Browse files Browse the repository at this point in the history
Release/1.2.0
  • Loading branch information
michaeltneylon authored Dec 4, 2019
2 parents 6331ac3 + 8e469a8 commit 30d4af9
Show file tree
Hide file tree
Showing 44 changed files with 12,372 additions and 196 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ docs/source/modules.rst
docs/source/pytest_wdl.rst
docs/source/pytest_wdl.data_types.rst
docs/source/pytest_wdl.executors.rst
docs/source/pytest_wdl.url_schemes.rst

# Environments
.env
Expand Down
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ addons:
apt:
packages:
- openjdk-8-jdk
- docker-ce
services:
- docker
cache:
Expand All @@ -13,6 +14,7 @@ cache:
python:
- '3.6'
- '3.7'
#- '3.8'
install:
- pip install --upgrade pip wheel
- pip install -r requirements.txt
Expand Down
11 changes: 11 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changes

## v1.2.0 (2019.12.04)

* Fix #86 - enable test_data.json file to be located in the same directory as the WDL file
* When comparing BAM files, by default only compare HD, SQ, and RG headers
* Enhance the error message that is displayed when a workflow fails
* Add ability to validate data file digests
* Optionally show progress bar when downloading data file
* Update miniwdl minimum version to 0.5.2, and update the miniwdl executor to use `docker swarm`
* Update xphyle minimum version to 4.1.3
* Other bugfixes

## v1.1.1 (2019.09.27)

* Fixes `license` entry in `setup.py` for proper rendering to release to PyPI.
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
[![Code Coverage](https://codecov.io/gh/elilillyco/pytest-wdl/branch/master/graph/badge.svg)](https://codecov.io/gh/elilillyco/pytest-wdl)
[![Documentation Status](https://readthedocs.org/projects/pytest-wdl/badge/?version=latest)](https://pytest-wdl.readthedocs.io/en/latest/?badge=latest)

<img width="200" alt="logo" src="docs/source/logo.png"/>

This package is a plugin for the [pytest](https://docs.pytest.org/en/latest/) unit testing framework that enables testing of workflows written in [Workflow Description Language](https://github.com/openwdl).

## Dependencies

* Python 3.6+
* Python 3.6 or 3.7 (3.8 is not yet fully supported)
* Java 1.8+
* [Cromwell](https://github.com/broadinstitute/cromwell/releases/tag/38) JAR file
* [Docker](https://www.docker.com/get-started) daemon (if your WDL tasks depend on Docker images)
Expand Down
2 changes: 2 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
.. image:: logo.png

Welcome to pytest-wdl's documentation!
======================================

Expand Down
Binary file added docs/source/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions docs/source/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ As a short-cut, the "class" attribute can be omitted and the map describing the
"url": "http://example.com/my.bam",
"http_headers": {
"auth_token": "TOKEN"
},
"digests": {
"md5": "8db3048a86e16a08d2d8341d1c72fecb"
}
},
"reference": {
Expand Down Expand Up @@ -140,6 +143,7 @@ The available keys for configuring file inputs/outputs are:
* `env`: The name of an environment variable in which to look up the header value.
* `value`: The header value; only used if an environment variable is not specified or is unset.
* `contents`: The contents of the file, specified as a string. The file is written to `path` the first time it is requested.
* `digests`: Optional mapping of hash algorithm name to digest. These are digests that have been computed on the remote file and are used to validate the downloaded file. Currently only used for files resolved from URLs.

In addition, the following keys are recognized for output files only:

Expand Down
2 changes: 2 additions & 0 deletions pytest_wdl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
module.
"""
from pytest_wdl import fixtures
from pytest_wdl.executors import ExecutionFailedError

import pytest


Expand Down
6 changes: 2 additions & 4 deletions pytest_wdl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import tempfile
from typing import Dict, List, Optional, Union

from xphyle import open_

from pytest_wdl.utils import ensure_path, env_map


Expand Down Expand Up @@ -80,7 +78,7 @@ def __init__(
executor_defaults: Optional[Dict[str, dict]] = None,
):
if config_file:
with open_(config_file, "rt") as inp:
with open(config_file, "rt") as inp:
defaults = json.load(inp)
else:
defaults = {}
Expand Down Expand Up @@ -160,4 +158,4 @@ def cleanup(self) -> None:
`self.remove_cache_dir` is True.
"""
if self.remove_cache_dir:
shutil.rmtree(self.cache_dir)
shutil.rmtree(self.cache_dir, ignore_errors=True)
43 changes: 24 additions & 19 deletions pytest_wdl/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,23 +101,27 @@ def __init__(self, data_descriptors: dict, user_config: UserConfiguration):
self.user_config = user_config

def resolve(self, name: str, datadirs: Optional[DataDirs] = None):
if name not in self.data_descriptors:
raise ValueError(f"Unrecognized name {name}")

value = self.data_descriptors[name]

if isinstance(value, dict):
# Right now, "class" is just a marker for object types, of which
# "file" is a special case.
cls = value.get("class", "file")
if "value" in value:
value = value["value"]
if cls == "file":
return create_data_file(
user_config=self.user_config,
datadirs=datadirs,
**cast(dict, value)
)
if name in self.data_descriptors:
value = self.data_descriptors[name]

if isinstance(value, dict):
# Right now, "class" is just a marker for object types, of which
# "file" is a special case.
cls = value.get("class", "file")
if "value" in value:
value = value["value"]
if cls == "file":
value = create_data_file(
user_config=self.user_config,
datadirs=datadirs,
**cast(dict, value)
)
else:
value = create_data_file(
name=name,
user_config=self.user_config,
datadirs=datadirs
)

return value

Expand Down Expand Up @@ -167,8 +171,9 @@ def create_data_file(
url: Optional[str] = None,
contents: Optional[Union[str, dict]] = None,
env: Optional[str] = None,
datadirs: Optional[DataDirs] = None,
http_headers: Optional[dict] = None,
digests: Optional[dict] = None,
datadirs: Optional[DataDirs] = None,
**kwargs
) -> DataFile:
if isinstance(type, dict):
Expand All @@ -193,7 +198,7 @@ def create_data_file(
else:
localizer = LinkLocalizer(env_path)
elif url:
localizer = UrlLocalizer(url, user_config, http_headers)
localizer = UrlLocalizer(url, user_config, http_headers, digests)
if not local_path:
if name:
local_path = ensure_path(user_config.cache_dir / name)
Expand Down
30 changes: 11 additions & 19 deletions pytest_wdl/data_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod
import hashlib
from pathlib import Path
from typing import Callable, Optional, Union, cast

import subby
from xphyle import open_

from pytest_wdl.localizers import Localizer
from pytest_wdl.utils import tempdir
from pytest_wdl.utils import tempdir, compare_files_with_hash
from xphyle import guess_file_format
from xphyle.utils import transcode_file

Expand Down Expand Up @@ -56,7 +54,13 @@ def __init__(
@property
def path(self) -> Path:
if not self.local_path.exists():
self.localizer.localize(self.local_path)
if self.localizer:
self.localizer.localize(self.local_path)
else:
raise RuntimeError(
f"Localization to {self.local_path} is required but no localizer "
f"is defined"
)
return self.local_path

def __str__(self) -> str:
Expand Down Expand Up @@ -173,7 +177,7 @@ def assert_text_files_equal(
def compare_gzip(file1: Path, file2: Path):
crc_size1 = subby.sub(f"gzip -lv {file1} | tail -1 | awk '{{print $2\":\"$7}}'")
crc_size2 = subby.sub(f"gzip -lv {file2} | tail -1 | awk '{{print $2\":\"$7}}'")
if crc_size1 != crc_size2:
if crc_size1 != crc_size2: # TODO: test this
raise AssertionError(
f"CRCs and/or uncompressed sizes differ between expected identical "
f"gzip files {file1}, {file2}"
Expand All @@ -187,21 +191,9 @@ def compare_gzip(file1: Path, file2: Path):
}


def assert_binary_files_equal(
file1: Path,
file2: Path,
hash_fn: Callable[[bytes], hashlib._hashlib.HASH] = hashlib.md5
) -> None:
def assert_binary_files_equal(file1: Path, file2: Path, digest: str = "md5") -> None:
fmt = guess_file_format(file1)
if fmt and fmt in BINARY_COMPARATORS:
BINARY_COMPARATORS[fmt](file1, file2)
else:
with open_(file1, "rb") as inp1:
file1_md5 = hash_fn(inp1.read()).hexdigest()
with open_(file2, "rb") as inp2:
file2_md5 = hash_fn(inp2.read()).hexdigest()
if file1_md5 != file2_md5:
raise AssertionError(
f"MD5 hashes differ between expected identical files "
f"{file1}, {file2}"
)
compare_files_with_hash(file1, file2, digest)
47 changes: 25 additions & 22 deletions pytest_wdl/data_types/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@
from functools import partial
from pathlib import Path
import re
from typing import Optional
from typing import Iterable, Optional

import subby
from xphyle import open_

from pytest_wdl.data_types import DataFile, assert_text_files_equal, diff_default
from pytest_wdl.utils import tempdir

try: # pragma: no-cover
# TODO: fall back to command line samtools (if installed)
try:
import pysam
except ImportError:
except ImportError: # pragma: no-cover
raise ImportError(
"Failed to import dependencies for bam type. To add support for BAM files, "
"install the plugin with pip install pytest-wdl[bam]"
Expand Down Expand Up @@ -109,13 +109,13 @@ def assert_bam_files_equal(
bam_to_sam(
file1,
cmp_file1,
headers=False,
headers=None,
sorting=Sorting.NAME
)
bam_to_sam(
file2,
cmp_file2,
headers=False,
headers=None,
sorting=Sorting.NAME
)
assert_text_files_equal(
Expand All @@ -131,14 +131,12 @@ def assert_bam_files_equal(
bam_to_sam(
file1,
cmp_file1,
headers=True,
min_mapq=min_mapq,
sorting=Sorting.COORDINATE,
)
bam_to_sam(
file2,
cmp_file2,
headers=True,
min_mapq=min_mapq,
sorting=Sorting.COORDINATE
)
Expand All @@ -157,7 +155,7 @@ def assert_bam_files_equal(
def bam_to_sam(
input_bam: Path,
output_sam: Path,
headers: bool = True,
headers: Optional[Iterable[str]] = ("HD", "SQ", "RG"),
min_mapq: Optional[int] = None,
sorting: Sorting = Sorting.NONE
):
Expand All @@ -167,34 +165,39 @@ def bam_to_sam(
opts = []
if headers:
opts.append("-h")
headers = set(headers)
if min_mapq:
opts.extend(["-q", str(min_mapq)])
sam = pysam.view(*opts, str(input_bam)).rstrip()
# Replace any randomly assigned readgroups with a common placeholder
sam = re.sub(r"UNSET-\w*\b", "UNSET-placeholder", sam)

lines = sam.splitlines(keepends=True)
header_lines = []
start = 0
if headers:
for i, line in enumerate(lines):
if not line.startswith("@"):
start = i
break
elif line[1:3] in headers:
header_lines.append(line)

body_lines = lines[start:]
if sorting is not Sorting.NONE:
lines = sam.splitlines(keepends=True)
start = 0
if headers:
for i, line in enumerate(lines):
if not line.startswith("@"):
start = i
break

with tempdir() as temp:
temp_sam = temp / f"output_{str(output_sam.stem)}.sam"
with open_(temp_sam, "w") as out:
out.write("".join(lines[start:]))
with open(temp_sam, "w") as out:
out.write("".join(body_lines))
if sorting is Sorting.COORDINATE:
sort_cols = "-k3,3 -k4,4n -k2,2n"
else:
sort_cols = "-k1,1 -k2,2n"
sorted_sam = subby.sub(f"cat {str(temp_sam)} | sort {sort_cols}")
lines = lines[:start] + [sorted_sam]
body_lines = [sorted_sam]

with open_(output_sam, "w") as out:
out.write("".join(lines))
with open(output_sam, "w") as out:
out.write("".join(header_lines + body_lines))


def diff_bam_columns(file1: Path, file2: Path, columns: str) -> int:
Expand Down
6 changes: 2 additions & 4 deletions pytest_wdl/data_types/json.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import json
from pathlib import Path

from xphyle import open_

from pytest_wdl.data_types import DataFile


class JsonDataFile(DataFile):
def _assert_contents_equal(self, other_path: Path, other_opts: dict) -> None:
with open_(self.path, "rt") as inp:
with open(self.path, "rt") as inp:
try:
j1 = json.load(inp)
except json.decoder.JSONDecodeError:
raise AssertionError(f"Invalid JSON file {self.path}")
with open_(other_path, "rt") as inp:
with open(other_path, "rt") as inp:
try:
j2 = json.load(inp)
except json.decoder.JSONDecodeError:
Expand Down
3 changes: 1 addition & 2 deletions pytest_wdl/data_types/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import re

import subby
from xphyle import open_

from pytest_wdl.data_types import DataFile, assert_text_files_equal, diff_default
from pytest_wdl.utils import tempdir
Expand Down Expand Up @@ -56,7 +55,7 @@ def diff_vcf_columns(file1: Path, file2: Path, compare_phase: bool = False) -> i
def make_comparable(infile, outfile):
cmd = ["grep -vE '^#'", "cut -f 1-5,7,10", "cut -d ':' -f 1"]
output = subby.sub(cmd, stdin=infile)
with open_(outfile, "wt") as out:
with open(outfile, "wt") as out:
if compare_phase:
out.write(output)
else:
Expand Down
Loading

0 comments on commit 30d4af9

Please sign in to comment.