Skip to content

Commit

Permalink
Update pysam depedency to 0.20.0
Browse files Browse the repository at this point in the history
Fix `pysam.index()` calls which now use samtools 1.16 where
`samtools index input output` doesn't work any more if `output` is the path
of an already existing file, but `samtools index -o output input` works
fine.

Also:
- Pre-create the (empty) index file for mock test datasets as a unit test
  reproducer of the above issue.
- Fix `test_cram` unit test that wasn't checking the return value of
  `CRAM.set_index_file()`, which was in fact returning False because
  `test-data/2.cram` was not sorted (updated now).
  • Loading branch information
nsoranzo committed Jan 1, 2023
1 parent 6c73423 commit a850a47
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 17 deletions.
10 changes: 5 additions & 5 deletions lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,13 +749,13 @@ def dataset_content_needs_grooming(self, file_name: str) -> bool:
cmd = [
"python",
"-c",
f"import pysam; pysam.set_verbosity(0); pysam.index('{file_name}', '{index_name}')",
f"import pysam; pysam.set_verbosity(0); pysam.index('-o', '{index_name}', '{file_name}')",
]
else:
cmd = [
"python",
"-c",
f"import pysam; pysam.set_verbosity(0); pysam.index('{index_flag}', '{file_name}', '{index_name}')",
f"import pysam; pysam.set_verbosity(0); pysam.index('{index_flag}', '-o', '{index_name}', '{file_name}')",
]
with open(os.devnull, "w") as devnull:
subprocess.check_call(cmd, stderr=devnull, shell=False)
Expand Down Expand Up @@ -786,9 +786,9 @@ def set_meta(
)
if index_flag == "-b":
# IOError: No such file or directory: '-b' if index_flag is set to -b (pysam 0.15.4)
pysam.index(dataset.file_name, index_file.file_name) # type: ignore [attr-defined]
pysam.index("-o", index_file.file_name, dataset.file_name) # type: ignore [attr-defined]
else:
pysam.index(index_flag, dataset.file_name, index_file.file_name) # type: ignore [attr-defined]
pysam.index(index_flag, "-o", index_file.file_name, dataset.file_name) # type: ignore [attr-defined]
dataset.metadata.bam_index = index_file

def sniff(self, filename: str) -> bool:
Expand Down Expand Up @@ -979,7 +979,7 @@ def get_cram_version(self, filename: str) -> Tuple[int, int]:

def set_index_file(self, dataset: "DatasetInstance", index_file) -> bool:
try:
pysam.index(dataset.file_name, index_file.file_name) # type: ignore [attr-defined]
pysam.index("-o", index_file.file_name, dataset.file_name) # type: ignore [attr-defined]
return True
except Exception as exc:
log.warning("%s, set_index_file Exception: %s", self, exc)
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/converters/cram_to_bam_converter.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ python '$__tool_directory__/cram_to_bam.py' '$input' '$output'
<param name="input" ftype="cram" value="2.cram"/>
<output name="output" ftype="bam">
<assert_contents>
<has_size value="57232"/>
<has_size value="60331"/>
</assert_contents>
</output>
</test>
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/dependencies/pinned-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pyparsing==3.0.9 ; python_version >= "3.7" and python_version < "3.11"
pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.8" and python_version < "3.11"
pyreadline==2.1 ; sys_platform == "win32" and python_version < "3.8" and python_version >= "3.7"
pyrsistent==0.19.3 ; python_version >= "3.7" and python_version < "3.11"
pysam==0.19.1 ; python_version >= "3.7" and python_version < "3.11"
pysam==0.20.0 ; python_version >= "3.7" and python_version < "3.11"
python-dateutil==2.8.2 ; python_version >= "3.7" and python_version < "3.11"
python-jose==3.3.0 ; python_version >= "3.7" and python_version < "3.11"
python-magic==0.4.27 ; python_version >= "3.7" and python_version < "3.11"
Expand Down
2 changes: 1 addition & 1 deletion packages/app/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ install_requires =
pebble
pulsar-galaxy-lib>=0.15.0.dev0
pydantic
pysam
pysam>=0.20
PyJWT
PyYAML
refgenconf>=0.12.0
Expand Down
2 changes: 1 addition & 1 deletion packages/data/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ install_requires =
pydantic[email]
pylibmagic
python-magic
pysam
pysam>=0.20
rocrate
social-auth-core[openidconnect]==4.0.3
SQLAlchemy>=1.4.25,<2
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ PyJWT = "*"
pykwalify = "*"
pylibmagic = "*"
pyparsing = "*"
pysam = "*"
pysam = ">=0.20"
python = ">=3.7,<3.11" # Keep maximum version strict to appease numpy and scipy
python-dateutil = "*"
python-magic = "*"
Expand Down
Binary file modified test-data/2.cram
Binary file not shown.
6 changes: 1 addition & 5 deletions test/unit/data/datatypes/test_cram.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import pysam

from galaxy.datatypes.binary import CRAM
Expand All @@ -12,9 +10,7 @@
def test_cram():
c = CRAM()
with get_input_files("2.cram") as input_files, get_dataset(input_files[0], index_attr="cram_index") as dataset:
assert os.path.exists(dataset.metadata.cram_index.file_name) is False
c.set_index_file(dataset=dataset, index_file=dataset.metadata.cram_index)
assert os.path.exists(dataset.metadata.cram_index.file_name) is True
assert c.set_index_file(dataset=dataset, index_file=dataset.metadata.cram_index) is True
c.set_meta(dataset)
pysam.AlignmentFile(dataset.file_name, index_filename=dataset.metadata.cram_index.file_name)
assert dataset.metadata.cram_version == "3.0"
4 changes: 2 additions & 2 deletions test/unit/data/datatypes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_size(self):
@contextmanager
def get_dataset(filename, index_attr="bam_index", dataset_id=1, has_data=True):
dataset = MockDataset(dataset_id)
with get_input_files(filename) as input_files, get_tmp_path() as index_path:
with get_input_files(filename) as input_files, get_tmp_path(should_exist=True) as index_path:
dataset.file_name = input_files[0]
index = MockMetadata()
index.file_name = index_path
Expand Down Expand Up @@ -64,6 +64,6 @@ def get_input_files(*args):
yield test_files
new_md5_sums = [md5_hash_file(f) for f in test_files]
for old_hash, new_hash, f in zip(md5_sums, new_md5_sums, test_files):
assert old_hash == new_hash, "Unexpected change of content for file %s" % f
assert old_hash == new_hash, f"Unexpected change of content for file {f}"
finally:
shutil.rmtree(temp_dir, ignore_errors=True)

0 comments on commit a850a47

Please sign in to comment.