Skip to content

Commit fcf748e

Browse files
committed
fix unit test
1 parent 4256230 commit fcf748e

File tree

4 files changed

+24
-13
lines changed

4 files changed

+24
-13
lines changed

.github/workflows/python-app.yml

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ jobs:
2727
echo "$HOME/.local/bin" >> $GITHUB_PATH
2828
- name: Display Python version
2929
run: python -c "import sys; print(sys.version)"
30+
- name: Install HDF5
31+
run: sudo apt-get update && sudo apt-get install -y libhdf5-dev
3032
- name: Install dependencies
3133
run: |
3234
python -m pip install --upgrade pip

README.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ Mashpit database can be built using:
5252
A custom database is a collection of genomes based on a proveded biosample accesion list.
5353

5454
## Quick start
55-
Here we use a small *Listeria innocua* pathogen detection
5655
#### 1. Build a *Listeria innocua* database:
5756
PDG000000091.9 was versioned on 2022-07-29
5857
```
@@ -160,6 +159,6 @@ mashpit webserver
160159
```
161160
After running this command, a GUI interface will be deployed at 127.0.0.1:8080. Visit the link in your browser to start using the webserver. The webserver allows users to upload a query sample and select a database to query against. The results will be displayed in a table and a tree. A screenshot of the webserver is shown below:
162161

163-
![screenshot](docs/mashpit_webserver.pdf)
162+
![screenshot](https://github.com/tongzhouxu/mashpit/blob/master/docs/img/mashpit_webserver.pdf)
164163

165164
To note, a pre-built database is required to run the webserver. The database can be built using the `mashpit build` command.

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[build-system]
2-
requires = ["setuptools"]
2+
requires = ["setuptools ~= 75.1.0"]
33
build-backend = "setuptools.build_meta"
44

55
[project]
@@ -16,6 +16,7 @@ requires-python = ">=3.8"
1616
dependencies = [
1717
"sourmash ~= 4.6.1",
1818
"pandas ~= 2.0.3",
19+
"numpy ~= 1.24.4",
1920
"biopython ~= 1.83",
2021
"scikit-bio ~= 0.6.2",
2122
"tqdm ~= 4.67.0",

test/test.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import hashlib
88
import subprocess
99

10+
from sourmash import load_file_as_signatures, save_signatures
1011
from mashpit.build import create_connection
1112
from mashpit.build import create_database
1213
from mashpit.build import download_metadata
@@ -212,19 +213,24 @@ def test_build_taxonomy(self):
212213
"3dd0e9f5112d633a0bb9ad1300028baabe253e70ed852a208396b317501b08e9"
213214
)
214215
expected_signature_sha = (
215-
"1e7b64deec8f84f0dc0ec765a73e2d9d1e7e0a5f90f85ed148de4f513f4b74ea"
216+
"4dc90abc2935a70fc615ac0111a8a27fc506b25e858a29ec38c5e382790ebab8"
216217
)
217218
hasher = hashlib.sha256()
218219
with open("test_listeria_innocua/test_listeria_innocua.db", "rb") as f:
219220
buf = f.read()
220221
hasher.update(buf)
221222
actual_sqlite_sha = hasher.hexdigest()
222223
hasher = hashlib.sha256()
223-
with open("test_listeria_innocua/test_listeria_innocua.sig", "rb") as f:
224+
database_sig = load_file_as_signatures("test_listeria_innocua/test_listeria_innocua.sig")
225+
database_sig_sorted = sorted(database_sig, key=lambda x: x.name)
226+
with open("test_listeria_innocua/test_listeria_innocua.sig.sorted", "wb") as f:
227+
save_signatures(database_sig_sorted, fp=f)
228+
with open("test_listeria_innocua/test_listeria_innocua.sig.sorted", "rb") as f:
224229
buf = f.read()
225230
hasher.update(buf)
226231
actual_signature_sha = hasher.hexdigest()
227232
self.assertEqual(actual_sqlite_sha, expected_sqlite_sha)
233+
228234
self.assertEqual(actual_signature_sha, expected_signature_sha)
229235
subprocess.run(
230236
["datasets", "download", "genome", "accession", "GCA_022617975.1"]
@@ -241,10 +247,17 @@ def test_build_taxonomy(self):
241247
]
242248
)
243249
expected_sha = (
244-
"0eda85331792619a76f85d9be8ff1a6ddedb509f603fbe9dad4ef9714d19b054"
250+
"82688f582fd95b249f8a9511a243c0b86433897c45ee7d714ea3d5610ac399a9"
245251
)
246252
hasher = hashlib.sha256()
247-
with open("GCA_022617975_output.csv", "rb") as f:
253+
# read the output file
254+
output_file = pd.read_csv("GCA_022617975_output.csv")
255+
# sort the output file by PDS_acc
256+
output_file.sort_values(by=["PDS_acc"], inplace=True)
257+
# drop the first column
258+
output_file.drop(output_file.columns[0], axis=1, inplace=True)
259+
output_file.to_csv("GCA_022617975_output.csv.sorted", index=False)
260+
with open("GCA_022617975_output.csv.sorted", "rb") as f:
248261
buf = f.read()
249262
hasher.update(buf)
250263
actual_sha = hasher.hexdigest()
@@ -266,10 +279,6 @@ def test_build_accession(self):
266279
# generate the test accession file
267280
accession_list = [
268281
"SAMN20822594",
269-
"SAMN20821237",
270-
"SAMN20934490",
271-
"SAMN20977378",
272-
"SAMN20977393",
273282
]
274283
with open("test_accession_list", "w") as f:
275284
for accession in accession_list:
@@ -288,10 +297,10 @@ def test_build_accession(self):
288297
)
289298

290299
expected_sqlite_sha = (
291-
"3912465140b2886b2e1daf5414e42a7d15fb112aed43621ecc3524b823077bf4"
300+
"f14ec35ef299b33d3cbcffe4d4a87e4a79cfb78dac0a5cebabf22e2d72852cfa"
292301
)
293302
expected_signature_sha = (
294-
"96a024342beefae84513ec6ef1ba206846a9cb24e489a069b72c4448139f6d9d"
303+
"2cdf077e256ada00a4d13e1cf5a22be945b713770da81b1de804ef2cc524b10b"
295304
)
296305
hasher = hashlib.sha256()
297306
with open("test_accession/test_accession.db", "rb") as f:

0 commit comments

Comments
 (0)