Skip to content

Commit

Permalink
Parallelization of get_pairwise_alignments
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Esteban Palma Igor committed Aug 2, 2023
1 parent aa40d3f commit 1b99dd8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
30 changes: 20 additions & 10 deletions trycycler/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,20 @@
If not, see <http://www.gnu.org/licenses/>.
"""

from concurrent.futures import as_completed, ProcessPoolExecutor
import edlib
import re

from .log import log, section_header, explanation


def get_pairwise_alignments(seqs):
def align_sequences(seq_a, seq_b):
result = edlib.align(seq_a, seq_b, mode='NW', task='path')
cigar = result['cigar']
percent_identity, worst_1kbp = identity_and_worst_1kbp_from_cigar(cigar)
return cigar, percent_identity, worst_1kbp

def get_pairwise_alignments(seqs, threads=1):
section_header('Pairwise global alignments')
explanation('Trycycler uses the edlib aligner to get global alignments between all pairs of '
'sequences. This can help you to spot any problematic sequences that should be '
Expand All @@ -28,18 +35,21 @@ def get_pairwise_alignments(seqs):
max_seq_name_len = max(len(x) for x in seq_names)
pairwise_cigars, percent_identities, worst_1kbp_identities = {}, {}, {}

for i, a in enumerate(seq_names):
seq_a = seqs[a]
for j in range(i+1, len(seq_names)):
b = seq_names[j]
seq_b = seqs[b]
with ProcessPoolExecutor(max_workers=threads) as executor:
futures = {}
for i, a in enumerate(seq_names):
seq_a = seqs[a]
for j in range(i+1, len(seq_names)):
b = seq_names[j]
seq_b = seqs[b]
future = executor.submit(align_sequences, seq_a, seq_b)
futures[future] = (a, b)
for future in as_completed(futures):
a, b = futures[future]
cigar, percent_identity, worst_1kbp = future.result()
log(' ' * (max_seq_name_len - len(a)) + a, end='')
log(' vs ', end='')
log(b + '...' + ' ' * (max_seq_name_len - len(b)), end=' ')

result = edlib.align(seq_a, seq_b, mode='NW', task='path')
cigar = result['cigar']
percent_identity, worst_1kbp = identity_and_worst_1kbp_from_cigar(cigar)
log(f'{percent_identity:.3f}% overall identity, '
f'{worst_1kbp:.1f}% worst-1kbp identity')

Expand Down
2 changes: 1 addition & 1 deletion trycycler/reconcile.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def reconcile(args):
seqs = circularise(seqs, args)
seqs, starting_seq = get_starting_seq(seqs, args.threads)
seqs = rotate_to_starting_seq(seqs, starting_seq)
pairwise_cigars, percent_identities, worst_1kbp_identities = get_pairwise_alignments(seqs)
pairwise_cigars, percent_identities, worst_1kbp_identities = get_pairwise_alignments(seqs, args.threads)
print_identity_matrix(seqs, percent_identities, args.min_identity)
print_worst_1kbp_matrix(seqs, worst_1kbp_identities, args.min_1kbp_identity)
finished_message()
Expand Down

0 comments on commit 1b99dd8

Please sign in to comment.