Skip to content

Commit

Permalink
Accommodating pipes in programs.py
Browse files Browse the repository at this point in the history
Adding in docker pipes unit test

Added docs to docker_call and fixed docker_call unit test

Changed datasize for docker_call pipes unit test to 1GB
  • Loading branch information
cmarkello committed Jul 30, 2016
1 parent f3df8ff commit 887292f
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 16 deletions.
106 changes: 91 additions & 15 deletions src/toil_scripts/lib/programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def mock_mode():
return True if int(os.environ.get('TOIL_SCRIPTS_MOCK_MODE', '0')) else False


def docker_call(tool,
def docker_call(tool='',
tools=[],
parameters=None,
work_dir='.',
rm=True,
Expand All @@ -25,11 +26,14 @@ def docker_call(tool,
outputs=None,
docker_parameters=None,
check_output=False,
return_stderr=False,
mock=None):
"""
Calls Docker, passing along parameters and tool.
:param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
:param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools)
:param list[str] tools: str list of names of the Docker images and order to be used in
adding piped commands to docker. (e.g. ['quay.io/ucsc_cgl/samtools', 'ubuntu'])
:param list[str] parameters: Command line arguments to be passed to the tool
:param str work_dir: Directory to mount into the container via `-v`. Destination convention is /data
:param bool rm: Set to True to pass `--rm` flag.
Expand All @@ -41,8 +45,33 @@ def docker_call(tool,
or a url. The value is only used if mock=True
:param dict[str,str] docker_parameters: Parameters to pass to docker
:param bool check_output: When True, this function returns docker's output
:param bool return_stderr: When True, this function includes stderr in docker's output
:param bool mock: Whether to run in mock mode. If this variable is unset, its value will be determined by
the environment variable.
Piping docker commands can be done in one of two ways depending on use case:
Running a pipe in docker in 'pipe-in-single-container' mode produces command structure
docker '... | ... | ...' where each '...' command corresponds to each element in the 'parameters'
argument that uses a docker container. This is the most efficient method if you want to run a pipe of
commands where each command uses the same docker container.
Running a pipe in docker in 'pipe-of-containers' mode produces command structure
docker '...' | docker '...' | docker '...' where each '...' command corresponds to each element in
the 'parameters' argument that uses a docker container and each 'docker' tool in the pipe
corresponds to each element in the 'tool' argument
Examples for running command 'head -c 1M </dev/urandom | tee >(md5sum 1>&2) | gzip | gunzip | md5sum 1>&2':
Running 'pipe-in-single-container' mode:
command= ['head -c 1M /dev/urandom | tee >(md5sum 1>&2)', 'gzip', 'gunzip', 'md5sum 1>&2']
work_dir=curr_work_dir
docker_tools=['ubuntu']
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tool=docker_tools, check_output=True)
Running 'pipe-of-containers' mode:
command= ['head -c 1M /dev/urandom | tee >(md5sum 1>&2)', 'gzip', 'gunzip', 'md5sum 1>&2']
work_dir=curr_work_dir
docker_tools=['ubuntu', 'ubuntu', 'ubuntu', 'ubuntu']
stdout = docker_call(work_dir=docker_work_dir, parameters=command, tool=docker_tools, check_output=True)
"""
from toil_scripts.lib.urls import download_url

Expand Down Expand Up @@ -83,36 +112,73 @@ def docker_call(tool,
if env:
for e, v in env.iteritems():
base_docker_call.extend(['-e', '{}={}'.format(e, v)])

if docker_parameters:
base_docker_call += docker_parameters

docker_call = []

run_pipe = False

_log.debug("Calling docker with %s." % " ".join(base_docker_call + [tool] + parameters))

docker_call = base_docker_call + [tool] + parameters
if bool(tools) == bool(tool):
raise Exception('Either "tool" or "tools" must contain a value, but not both.')
if not tools:
tools = [ tool ]
else:
run_pipe = True

# Pipe functionality
# each element in the parameters list must represent a sub-pipe command
shell_flag = True # Flag for running subprocess with string command or list command
if run_pipe:
command_list = []
if len(tools) > 1:
# If tool is a list containing multiple docker container name strings
# then format the docker call in the 'pipe-of-containers' mode
docker_call.extend(base_docker_call + ['--entrypoint /bin/bash', tools[0], '-c \'{}\''.format(parameters[0])])
for i in xrange(1, len(tools)):
docker_call.extend(['|'] + base_docker_call + ['-i --entrypoint /bin/bash', tools[i], '-c \'{}\''.format(parameters[i])])
docker_call = " ".join(docker_call)
_log.debug("Calling docker with %s." % docker_call)

elif len(tools) == 1:
# If tool is a list containing a single docker container name string
# then format the docker call in the 'pipe-in-single-container' mode
docker_call.extend(base_docker_call + ['--entrypoint /bin/bash', tools[0], '-c \'{}\''.format(" | ".join(parameters))])
docker_call = " ".join(docker_call)
_log.debug("Calling docker with %s." % docker_call)

else:
docker_call = " ".join(base_docker_call + tools + parameters)
_log.debug("Calling docker with %s." % docker_call)


try:
if outfile:
subprocess.check_call(docker_call, stdout=outfile)
subprocess.check_call(docker_call, stdout=outfile, shell=shell_flag)
elif check_output and return_stderr:
return subprocess.check_output(docker_call, shell=shell_flag, stderr=subprocess.STDOUT)
elif check_output:
return subprocess.check_output(docker_call, shell=shell_flag)
elif return_stderr:
return subprocess.check_call(docker_call, stderr=subprocess.STDOUT, shell=shell_flag)
else:
if check_output:
return subprocess.check_output(docker_call)
else:
subprocess.check_call(docker_call)
subprocess.check_call(docker_call, shell=shell_flag)
# Fix root ownership of output files
except:
# Panic avoids hiding the exception raised in the try block
with panic():
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tools, work_dir)
else:
_fix_permissions(base_docker_call, tool, work_dir)
_fix_permissions(base_docker_call, tools, work_dir)

for filename in outputs.keys():
if not os.path.isabs(filename):
filename = os.path.join(work_dir, filename)
assert(os.path.isfile(filename))


def _fix_permissions(base_docker_call, tool, work_dir):
def _fix_permissions(base_docker_call, tools, work_dir):
"""
Fix permission of a mounted Docker directory by reusing the tool
Expand All @@ -122,5 +188,15 @@ def _fix_permissions(base_docker_call, tool, work_dir):
"""
base_docker_call.append('--entrypoint=chown')
stat = os.stat(work_dir)
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
subprocess.check_call(command)
command = []
command_list = []
for tool in tools:
command = base_docker_call + [tool] + ['-R', '{}:{}'.format(stat.st_uid, stat.st_gid), '/data']
command_list.append(command)

for command in command_list:
subprocess.check_call(command)




22 changes: 21 additions & 1 deletion src/toil_scripts/lib/test/test_programs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

import re

def test_docker_call(tmpdir):
from toil_scripts.lib.programs import docker_call
Expand All @@ -12,3 +12,23 @@ def test_docker_call(tmpdir):
with open(fpath, 'w') as f:
docker_call(tool='ubuntu', env=dict(foo='bar'), parameters=['printenv', 'foo'], outfile=f)
assert open(fpath).read() == 'bar\n'

# Test pipe functionality
# download ubuntu docker image
docker_call(work_dir=work_dir, tool="ubuntu")
command1 = ['head -c 1G /dev/urandom | tee /data/first', 'gzip', 'gunzip', 'md5sum 1>&2']
command2 = ['md5sum /data/first 1>&2']
# Test 'pipe-in-single-container' mode
docker_tools1=['ubuntu']
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools=docker_tools1, check_output=True, return_stderr=True)
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
assert test1[0] == test2[0]
# Test 'pipe-of-containers' mode
docker_tools2=['ubuntu', 'ubuntu', 'ubuntu', 'ubuntu']
stdout1 = docker_call(work_dir=work_dir, parameters=command1, tools=docker_tools2, check_output=True, return_stderr=True)
stdout2 = docker_call(work_dir=work_dir, parameters=command2, tool='ubuntu', check_output=True, return_stderr=True)
test1 = re.findall(r"([a-fA-F\d]{32})", stdout1)
test2 = re.findall(r"([a-fA-F\d]{32})", stdout2)
assert test1[0] == test2[0]

0 comments on commit 887292f

Please sign in to comment.