Skip to content

Commit 680eaca

Browse files
committed
modified saturation function to use uncorrected distances — calculated as one minus the pairwise identity — rather than pairwise identity for the linear regression analysis
1 parent ba7393e commit 680eaca

File tree

5 files changed

+92
-86
lines changed

5 files changed

+92
-86
lines changed

change_log.txt

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
Major changes to PhyKIT are summarized here.
22

3+
1.19.3
4+
- Saturation function now uses uncorrected distances instead of pairwise identities
5+
36
1.19.2
47
- Verbose pairwise identity reporting separates pairwise identities by tabs and not a dash
58

docs/change_log/index.rst

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ Change log
88

99
Major changes to PhyKIT are summarized here.
1010

11+
**1.19.3**:
12+
Saturation function now uses uncorrected distances instead of pairwise identities
13+
1114
**1.19.2**:
1215
Verbose pairwise identity reporting separates pairwise identities by tabs and not a dash
1316

phykit/services/tree/saturation.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,17 @@ def run(self):
4040
# distances and pairwise identities
4141
(
4242
patristic_distances,
43-
pairwise_identities,
43+
uncorrected_distances,
4444
) = self.loop_through_combos_and_calculate_pds_and_pis(combos, alignment, tree)
4545

4646
# calculate linear regression
4747
_, _, r_value, _, _ = scipy.stats.linregress(
48-
pairwise_identities, patristic_distances
48+
uncorrected_distances, patristic_distances
4949
)
5050

5151
# report res
5252
self.print_res(
53-
self.verbose, combos, pairwise_identities, patristic_distances, r_value
53+
self.verbose, combos, uncorrected_distances, patristic_distances, r_value
5454
)
5555

5656
def process_args(self, args):
@@ -68,19 +68,20 @@ def loop_through_combos_and_calculate_pds_and_pis(
6868
their patristic distance and pairwise identity
6969
"""
7070
patristic_distances = []
71-
pairwise_identities = []
71+
uncorrected_distances = []
7272
aln_len = alignment.get_alignment_length()
7373
for combo in combos:
7474
# calculate pd
7575
patristic_distances.append(tree.distance(combo[0], combo[1]))
7676
# calculate pairwise identity
77-
pairwise_identities = self.calculate_pairwise_identities(
78-
alignment, pairwise_identities, aln_len, combo
77+
uncorrected_distances = self.calculate_uncorrected_distances(
78+
alignment, uncorrected_distances, aln_len, combo
7979
)
80-
return patristic_distances, pairwise_identities
8180

82-
def calculate_pairwise_identities(
83-
self, alignment, pairwise_identities: list, aln_len: int, combo: tuple
81+
return patristic_distances, uncorrected_distances
82+
83+
def calculate_uncorrected_distances(
84+
self, alignment, uncorrected_distances: list, aln_len: int, combo: tuple
8485
) -> list:
8586
"""
8687
calculate pairwise identities for a given combo
@@ -96,15 +97,15 @@ def calculate_pairwise_identities(
9697
for idx in range(0, aln_len):
9798
if seq_one[idx] == seq_two[idx]:
9899
identities += 1
99-
pairwise_identities.append(identities / aln_len)
100+
uncorrected_distances.append(1-(identities / aln_len))
100101

101-
return pairwise_identities
102+
return uncorrected_distances
102103

103104
def print_res(
104105
self,
105106
verbose: bool,
106107
combos: list,
107-
pairwise_identities: list,
108+
uncorrected_distances: list,
108109
patristic_distances: list,
109110
r_value: float,
110111
) -> None:
@@ -113,11 +114,11 @@ def print_res(
113114
"""
114115
try:
115116
if verbose:
116-
for combo, pairwise_identity, patristic_distance in zip(
117-
combos, pairwise_identities, patristic_distances
117+
for combo, dist, patristic_distance in zip(
118+
combos, uncorrected_distances, patristic_distances
118119
):
119120
print(
120-
f"{combo[0]}-{combo[1]}\t{round(pairwise_identity,4)}\t{round(patristic_distance, 4)}"
121+
f"{combo[0]}-{combo[1]}\t{round(dist,4)}\t{round(patristic_distance, 4)}"
121122
)
122123
else:
123124
print(round(r_value**2, 4))

phykit/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.19.2"
1+
__version__ = "1.19.3"

tests/integration/tree/test_saturation_integration.py

+69-70
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_saturation_incorrect_tree_path(self, mocked_print):
4848
"phykit",
4949
"saturation",
5050
"-t",
51-
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit.treefi",
51+
f"{here.parent.parent.parent}/sample_files/does_not_exist",
5252
"-a",
5353
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit",
5454
]
@@ -67,7 +67,7 @@ def test_saturation_incorrect_alignment_path(self, mocked_print):
6767
"-t",
6868
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit.treefile",
6969
"-a",
70-
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipki",
70+
f"{here.parent.parent.parent}/sample_files/does_not_exist",
7171
]
7272

7373
with pytest.raises(SystemExit) as pytest_wrapped_e:
@@ -78,7 +78,6 @@ def test_saturation_incorrect_alignment_path(self, mocked_print):
7878

7979
@patch("builtins.print")
8080
def test_saturation_verbose(self, mocked_print):
81-
expected_result = 0.8451
8281
testargs = [
8382
"phykit",
8483
"sat",
@@ -91,70 +90,70 @@ def test_saturation_verbose(self, mocked_print):
9190
with patch.object(sys, "argv", testargs):
9291
Phykit()
9392
assert mocked_print.mock_calls == [
94-
call("Kpol-Kpha\t0.6136\t0.6176"),
95-
call("Kpol-Snag\t0.5654\t0.7482"),
96-
call("Kpol-Suva\t0.5948\t0.6945"),
97-
call("Kpol-Skud\t0.5906\t0.7341"),
98-
call("Kpol-Smik\t0.6157\t0.7734"),
99-
call("Kpol-Scer\t0.6105\t0.7634"),
100-
call("Kpol-Kbla\t0.599\t0.7287"),
101-
call("Kpol-Kafr\t0.5696\t0.7509"),
102-
call("Kpol-Sdai\t0.5916\t0.7297"),
103-
call("Kpol-Scas\t0.5822\t0.7958"),
104-
call("Kpol-Cgla\t0.5979\t0.653"),
105-
call("Kpha-Snag\t0.5393\t0.8254"),
106-
call("Kpha-Suva\t0.5424\t0.7717"),
107-
call("Kpha-Skud\t0.5257\t0.8113"),
108-
call("Kpha-Smik\t0.5466\t0.8505"),
109-
call("Kpha-Scer\t0.5445\t0.8406"),
110-
call("Kpha-Kbla\t0.5435\t0.9228"),
111-
call("Kpha-Kafr\t0.5581\t0.945"),
112-
call("Kpha-Sdai\t0.5424\t0.9238"),
113-
call("Kpha-Scas\t0.5382\t0.9899"),
114-
call("Kpha-Cgla\t0.5738\t0.8472"),
115-
call("Snag-Suva\t0.5518\t0.7281"),
116-
call("Snag-Skud\t0.5508\t0.7678"),
117-
call("Snag-Smik\t0.5539\t0.807"),
118-
call("Snag-Scer\t0.555\t0.797"),
119-
call("Snag-Kbla\t0.5215\t1.0535"),
120-
call("Snag-Kafr\t0.5539\t1.0757"),
121-
call("Snag-Sdai\t0.5225\t1.0544"),
122-
call("Snag-Scas\t0.4984\t1.1206"),
123-
call("Snag-Cgla\t0.5131\t0.9778"),
124-
call("Suva-Skud\t0.8105\t0.229"),
125-
call("Suva-Smik\t0.7969\t0.2682"),
126-
call("Suva-Scer\t0.7958\t0.2583"),
127-
call("Suva-Kbla\t0.5361\t0.9997"),
128-
call("Suva-Kafr\t0.5162\t1.0219"),
129-
call("Suva-Sdai\t0.5309\t1.0007"),
130-
call("Suva-Scas\t0.5162\t1.0668"),
131-
call("Suva-Cgla\t0.5707\t0.9241"),
132-
call("Skud-Smik\t0.8199\t0.2171"),
133-
call("Skud-Scer\t0.8325\t0.2071"),
134-
call("Skud-Kbla\t0.5183\t1.0394"),
135-
call("Skud-Kafr\t0.5288\t1.0616"),
136-
call("Skud-Sdai\t0.5089\t1.0403"),
137-
call("Skud-Scas\t0.5005\t1.1065"),
138-
call("Skud-Cgla\t0.5602\t0.9637"),
139-
call("Smik-Scer\t0.8314\t0.1998"),
140-
call("Smik-Kbla\t0.5382\t1.0786"),
141-
call("Smik-Kafr\t0.5225\t1.1008"),
142-
call("Smik-Sdai\t0.534\t1.0796"),
143-
call("Smik-Scas\t0.5257\t1.1457"),
144-
call("Smik-Cgla\t0.5529\t1.003"),
145-
call("Scer-Kbla\t0.5372\t1.0686"),
146-
call("Scer-Kafr\t0.5162\t1.0908"),
147-
call("Scer-Sdai\t0.5298\t1.0696"),
148-
call("Scer-Scas\t0.5236\t1.1357"),
149-
call("Scer-Cgla\t0.5508\t0.993"),
150-
call("Kbla-Kafr\t0.5686\t0.67"),
151-
call("Kbla-Sdai\t0.5613\t0.8756"),
152-
call("Kbla-Scas\t0.5393\t0.9418"),
153-
call("Kbla-Cgla\t0.5508\t0.799"),
154-
call("Kafr-Sdai\t0.534\t0.8978"),
155-
call("Kafr-Scas\t0.5152\t0.964"),
156-
call("Kafr-Cgla\t0.555\t0.8212"),
157-
call("Sdai-Scas\t0.6356\t0.5357"),
158-
call("Sdai-Cgla\t0.5675\t0.7045"),
159-
call("Scas-Cgla\t0.5518\t0.7706"),
160-
]
93+
call("Kpol-Kpha\t0.3864\t0.6176"),
94+
call("Kpol-Snag\t0.4346\t0.7482"),
95+
call("Kpol-Suva\t0.4052\t0.6945"),
96+
call("Kpol-Skud\t0.4094\t0.7341"),
97+
call("Kpol-Smik\t0.3843\t0.7734"),
98+
call("Kpol-Scer\t0.3895\t0.7634"),
99+
call("Kpol-Kbla\t0.401\t0.7287"),
100+
call("Kpol-Kafr\t0.4304\t0.7509"),
101+
call("Kpol-Sdai\t0.4084\t0.7297"),
102+
call("Kpol-Scas\t0.4178\t0.7958"),
103+
call("Kpol-Cgla\t0.4021\t0.653"),
104+
call("Kpha-Snag\t0.4607\t0.8254"),
105+
call("Kpha-Suva\t0.4576\t0.7717"),
106+
call("Kpha-Skud\t0.4743\t0.8113"),
107+
call("Kpha-Smik\t0.4534\t0.8505"),
108+
call("Kpha-Scer\t0.4555\t0.8406"),
109+
call("Kpha-Kbla\t0.4565\t0.9228"),
110+
call("Kpha-Kafr\t0.4419\t0.945"),
111+
call("Kpha-Sdai\t0.4576\t0.9238"),
112+
call("Kpha-Scas\t0.4618\t0.9899"),
113+
call("Kpha-Cgla\t0.4262\t0.8472"),
114+
call("Snag-Suva\t0.4482\t0.7281"),
115+
call("Snag-Skud\t0.4492\t0.7678"),
116+
call("Snag-Smik\t0.4461\t0.807"),
117+
call("Snag-Scer\t0.445\t0.797"),
118+
call("Snag-Kbla\t0.4785\t1.0535"),
119+
call("Snag-Kafr\t0.4461\t1.0757"),
120+
call("Snag-Sdai\t0.4775\t1.0544"),
121+
call("Snag-Scas\t0.5016\t1.1206"),
122+
call("Snag-Cgla\t0.4869\t0.9778"),
123+
call("Suva-Skud\t0.1895\t0.229"),
124+
call("Suva-Smik\t0.2031\t0.2682"),
125+
call("Suva-Scer\t0.2042\t0.2583"),
126+
call("Suva-Kbla\t0.4639\t0.9997"),
127+
call("Suva-Kafr\t0.4838\t1.0219"),
128+
call("Suva-Sdai\t0.4691\t1.0007"),
129+
call("Suva-Scas\t0.4838\t1.0668"),
130+
call("Suva-Cgla\t0.4293\t0.9241"),
131+
call("Skud-Smik\t0.1801\t0.2171"),
132+
call("Skud-Scer\t0.1675\t0.2071"),
133+
call("Skud-Kbla\t0.4817\t1.0394"),
134+
call("Skud-Kafr\t0.4712\t1.0616"),
135+
call("Skud-Sdai\t0.4911\t1.0403"),
136+
call("Skud-Scas\t0.4995\t1.1065"),
137+
call("Skud-Cgla\t0.4398\t0.9637"),
138+
call("Smik-Scer\t0.1686\t0.1998"),
139+
call("Smik-Kbla\t0.4618\t1.0786"),
140+
call("Smik-Kafr\t0.4775\t1.1008"),
141+
call("Smik-Sdai\t0.466\t1.0796"),
142+
call("Smik-Scas\t0.4743\t1.1457"),
143+
call("Smik-Cgla\t0.4471\t1.003"),
144+
call("Scer-Kbla\t0.4628\t1.0686"),
145+
call("Scer-Kafr\t0.4838\t1.0908"),
146+
call("Scer-Sdai\t0.4702\t1.0696"),
147+
call("Scer-Scas\t0.4764\t1.1357"),
148+
call("Scer-Cgla\t0.4492\t0.993"),
149+
call("Kbla-Kafr\t0.4314\t0.67"),
150+
call("Kbla-Sdai\t0.4387\t0.8756"),
151+
call("Kbla-Scas\t0.4607\t0.9418"),
152+
call("Kbla-Cgla\t0.4492\t0.799"),
153+
call("Kafr-Sdai\t0.466\t0.8978"),
154+
call("Kafr-Scas\t0.4848\t0.964"),
155+
call("Kafr-Cgla\t0.445\t0.8212"),
156+
call("Sdai-Scas\t0.3644\t0.5357"),
157+
call("Sdai-Cgla\t0.4325\t0.7045"),
158+
call("Scas-Cgla\t0.4482\t0.7706"),
159+
]

0 commit comments

Comments
 (0)