From 4fbd33515c7314b19176d226bf016a68651812eb Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 00:03:06 +0100 Subject: [PATCH 1/8] License autodetection --- LICENCE.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/LICENCE.txt b/LICENCE.txt index 75c43f8..39db994 100644 --- a/LICENCE.txt +++ b/LICENCE.txt @@ -1,8 +1,4 @@ - -The MIT License (MIT) -[OSI Approved License] - -The MIT License (MIT) +MIT License Copyright (c) 2020 Edinburgh Genome Foundry @@ -13,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From bd4729e2117c1e17f062b56f68b74003410e835b Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 12:52:11 +0100 Subject: [PATCH 2/8] Black & formatting before Biopython v1.78 fix --- .../HomologousFragment/HomologousFragment.py | 38 ++++++++----------- .../Fragment/StickyEndFragment/StickyEnd.py | 8 ++-- .../StickyEndFragment/StickyEndFragment.py | 37 ++++++------------ dnacauldron/README.md | 2 +- dnacauldron/biotools/record_operations.py | 25 +++++------- dnacauldron/biotools/sequence_io.py | 35 +++++++---------- 6 files changed, 55 insertions(+), 90 deletions(-) diff --git a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py index 94bdfb4..dfe3b4d 100644 --- a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py +++ b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py @@ -8,7 +8,7 @@ class HomologousFragment(Fragment): @staticmethod def from_biopython_record(biopython_record): - """Convert a biopython record into a HomologousFragment (class change). + """Convert a Biopython record into a HomologousFragment (class change). """ new_record = deepcopy(biopython_record) new_record.original_part = biopython_record @@ -16,12 +16,9 @@ def from_biopython_record(biopython_record): return new_record def circularized( - self, - homology_checker, - annotate_homology=False, - annotation_type="homology", + self, homology_checker, annotate_homology=False, annotation_type="homology", ): - """Return the biopython record obtained by cirularizing the result. + """Return the Biopython record obtained by cirularizing the result. Only works if the left and right sticky ends are compatible. The return is a simple Biopython record where the sticky end has been @@ -33,8 +30,10 @@ def circularized( annotate_homology=True, annotation_type="homology", ) + def only_parts_indicators(feature): return feature.qualifiers.get("indicates_part", False) + result = crop_record_with_saddling_features( record=double_self, start=len(self), @@ -64,13 +63,11 @@ def _push_source_features(self, homology_size, side="left"): def will_clip_in_this_order_with(self, other_fragment, homology_checker): """Return whether the fragment will assemble with anoter via homology recombination. - + homology_checker should be an HomologyChecker instance definining the homology conditions. """ - homology_size = homology_checker.find_end_homologies( - self, other_fragment - ) + homology_size = homology_checker.find_end_homologies(self, other_fragment) return homology_size > 0 def assemble_with( @@ -82,16 +79,16 @@ def assemble_with( ): """Return the fragment resulting from the assembly of this fragment with another, in that order. - + Parameters ---------- - + fragment - The other parameter to assemble with - + The other parameter to assemble with. + homology_checker An HomologyChecker instance definining the homology conditions. - + annotate_homology If true, homologies will have an annotation in the final, predicted construct records. @@ -130,10 +127,7 @@ def only_parts_indicators(feature): @staticmethod def assemble( - fragments, - homology_checker, - circularize=False, - annotate_homologies=False, + fragments, homology_checker, circularize=False, annotate_homologies=False, ): """Return the record obtained by assembling the fragments. @@ -141,19 +135,17 @@ def assemble( ---------- fragments - List of HomologousFragments to assemble + List of HomologousFragments to assemble. homology_checker An HomologyChecker instance definining the homology conditions. - + circularize True to also assemble the end flanks of the final construct. annotate_homologies If true, homologies will have an annotation in the final, predicted construct records. - - """ result = fragments[0] for fragment in fragments[1:]: diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py index f34f9c5..cf0ab15 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py @@ -1,6 +1,7 @@ from Bio.Seq import Seq from ...biotools import sequence_to_biopython_record, annotate_record + class StickyEnd(Seq): """A class to represent the sticky end of a sequence. @@ -10,10 +11,10 @@ class StickyEnd(Seq): ---------- data - A DNA sequence in ATGC format + A DNA sequence in ATGC format. strand - The strand (+1 or -1) on which the protusion is + The strand (+1 or -1) on which the protusion is. **k Optional keyword arguments for the sequence, such as ``alphabet`` etc. @@ -40,10 +41,9 @@ def will_clip_directly_with(self, other): and (self.strand == -other.strand) and (str(self) == str(other)) ) - + def as_biopython_record(self): record = sequence_to_biopython_record(str(self)) sign = "+" if self.strand == 1 else "-" annotate_record(record, label="(%s) strand" % sign) return record - diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py index 18b33b1..69c695f 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py @@ -6,7 +6,7 @@ set_record_topology, crop_record_with_saddling_features, sequence_to_biopython_record, - annotate_record + annotate_record, ) from ..Fragment import Fragment from .StickyEnd import StickyEnd @@ -17,7 +17,7 @@ class StickyEndFragment(Fragment): """Biopython SeqRecord whose sequence has sticky ends.""" def will_clip_in_this_order_with(self, other): - """Return True iff this record's right sticky end is complementary with + """Return True if this record's right sticky end is complementary with the other record's left sticky end.""" right_end = self.seq.right_end return (right_end is not None) and right_end.will_clip_directly_with( @@ -25,12 +25,9 @@ def will_clip_in_this_order_with(self, other): ) def circularized( - self, - annotate_homology=False, - annotation_type="homology", - qualifiers=None, + self, annotate_homology=False, annotation_type="homology", qualifiers=None, ): - """Return the biopython record obtained by cirularizing the result. + """Return the Biopython record obtained by cirularizing the result. Only works if the left and right sticky ends are compatible. The return is a simple Biopython record where the sticky end has been @@ -38,8 +35,7 @@ def circularized( """ if not self.will_clip_in_this_order_with(self): raise ValueError( - "Only constructs with two compatible sticky ends" - " can be circularized" + "Only constructs with two compatible sticky ends" " can be circularized" ) connector = SeqRecord(Seq(str(self.seq.left_end))) if annotate_homology: @@ -57,10 +53,7 @@ def annotate_connector(self, connector, annotation_type="homology"): else: label = str(connector.seq) feature = self.create_homology_annotation( - start=0, - end=len(connector), - annotation_type=annotation_type, - label=label, + start=0, end=len(connector), annotation_type=annotation_type, label=label, ) connector.features = [feature] @@ -72,8 +65,8 @@ def assemble(fragments, circularize=False, annotate_homologies=False): ---------- fragments - List of StickyEndFragments to assemble - + List of StickyEndFragments to assemble. + circularize True to also assemble the end flanks of the final construct (results in a Biopython Record), false to not do it (the result is then a @@ -82,8 +75,6 @@ def assemble(fragments, circularize=False, annotate_homologies=False): annotate_homologies If true, homologies will have an annotation in the final, predicted construct records. - - """ result = fragments[0] for fragment in fragments[1:]: @@ -95,9 +86,7 @@ def assemble(fragments, circularize=False, annotate_homologies=False): result.seq.alphabet = DNAAlphabet() return result - def assemble_with( - self, other, annotate_homology=False, annotation_type="homology" - ): + def assemble_with(self, other, annotate_homology=False, annotation_type="homology"): connector_str = str(self.seq.right_end) connector = SeqRecord(Seq(connector_str)) if annotate_homology: @@ -118,17 +107,13 @@ def list_from_record_digestion(record, enzyme, linear="auto"): if linear == "auto": linear = record.annotations.get("topology", "linear") == "linear" if isinstance(enzyme, (list, tuple)): - n_cuts = sum( - [len(e.search(record.seq, linear=linear)) for e in enzyme] - ) + n_cuts = sum([len(e.search(record.seq, linear=linear)) for e in enzyme]) else: n_cuts = len(enzyme.search(record.seq, linear=linear)) if n_cuts == 0: return [record] if not linear: - record.features = [ - f for f in record.features if f.location is not None - ] + record.features = [f for f in record.features if f.location is not None] record_fragments = StickyEndFragment.list_from_record_digestion( record + record, enzyme=enzyme, linear=True ) diff --git a/dnacauldron/README.md b/dnacauldron/README.md index c4e58d3..f51752a 100644 --- a/dnacauldron/README.md +++ b/dnacauldron/README.md @@ -26,7 +26,7 @@ An ***AssemblyMix*** contains a list of ***Fragment*** instances (which subclass - A ***StickyEndFragmentMix***: - Such mixes contain a list of ***StickyEndFragment*** instances. - A ***StickyEndFragment*** has a ***StickyEndSeq*** (subclass of BioPython's *Seq* with additional ***StickyEnd*** sequences on the left and right). - - Usable Subclasses include ***RestrictionLigationMix***, ***Type2sRestrictionMix***, ***BASICAssemblyMix***. + - Usable Subclasses include ***RestrictionLigationMix***, ***Type2sRestrictionMix***, ***BASICAssemblyMix***. StickyEndFragments are generated by restriction mixes, for example by RestrictionLigationMix. - An ***HomologousFragmentMix***: - Such mixes contain a list of ***HomologousFragment*** instances. - Mixes also require a ***HomologyChecker*** to detect homologies. diff --git a/dnacauldron/biotools/record_operations.py b/dnacauldron/biotools/record_operations.py index ecccce7..fcdc1dc 100644 --- a/dnacauldron/biotools/record_operations.py +++ b/dnacauldron/biotools/record_operations.py @@ -1,10 +1,10 @@ - from copy import copy from Bio.Alphabet import DNAAlphabet from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation + def complement(dna_sequence): """Return the complement of the DNA sequence. @@ -14,6 +14,7 @@ def complement(dna_sequence): """ return str(Seq(dna_sequence).complement()) + def set_record_topology(record, topology): """Set the Biopython record's topology, possibly passing if already set. @@ -29,9 +30,7 @@ def set_record_topology(record, topology): "default_to_linear", ] if topology not in valid_topologies: - raise ValueError( - "topology should be one of %s." % ", ".join(valid_topologies) - ) + raise ValueError("topology should be one of %s." % ", ".join(valid_topologies)) annotations = record.annotations default_prefix = "default_to_" if topology.startswith(default_prefix): @@ -71,11 +70,7 @@ def sequence_to_biopython_record( def annotate_record( - seqrecord, - location="full", - feature_type="misc_feature", - margin=0, - **qualifiers + seqrecord, location="full", feature_type="misc_feature", margin=0, **qualifiers ): """Add a feature to a Biopython SeqRecord. @@ -83,19 +78,19 @@ def annotate_record( ---------- seqrecord - The biopython seqrecord to be annotated. + The Biopython seqrecord to be annotated. location Either (start, end) or (start, end, strand). (strand defaults to +1) feature_type - The type associated with the feature + The type associated with the feature. margin Number of extra bases added on each side of the given location. qualifiers - Dictionnary that will be the Biopython feature's `qualifiers` attribute. + Dictionary that will be the Biopython feature's `qualifiers` attribute. """ if location == "full": location = (margin, len(seqrecord) - margin) @@ -111,7 +106,7 @@ def annotate_record( def crop_record_with_saddling_features(record, start, end, filters=()): - """Crop the biopython record, but keep features that are only partially in. + """Crop the Biopython record, but keep features that are only partially in. Parameters ---------- @@ -119,11 +114,11 @@ def crop_record_with_saddling_features(record, start, end, filters=()): The Biopython record to crop. start, end - Coordinates of the segment to crop + Coordinates of the segment to crop. filters list of functions (feature=>True/False). Any feature that doesn't pass - at least one filter will be filtered out. + at least one filter will be filtered out. """ cropped = record[start:end] diff --git a/dnacauldron/biotools/sequence_io.py b/dnacauldron/biotools/sequence_io.py index 4c7e6c9..20b8323 100644 --- a/dnacauldron/biotools/sequence_io.py +++ b/dnacauldron/biotools/sequence_io.py @@ -45,7 +45,7 @@ def load_record( max_name_length=20, ): """Return a Biopython record read from a Fasta/Genbank/Snapgene file. - + Parameters ---------- @@ -56,16 +56,16 @@ def load_record( Can be "circular", "linear", "default_to_circular" (will default to circular if ``annotations['topology']`` is not already set) or "default_to_linear". - + id Sets the record.id. If "auto", the original record.id is used, and if none is set the name of the file (without extension) is used instead. - + upperize If true, the sequence will get upperized (recommended in this library, as the mix of upper and lower case can cause problems in Biopython's - enzyme sites search) - + enzyme site search). + max_name_length The name of the record will be truncated if too long to avoid Biopython exceptions being raised. @@ -95,7 +95,7 @@ def load_record( def _load_records_from_zip_file(zip_file, use_file_names_as_ids=False): """Return all fasta/genbank/snapgene in a zip as biopython records. - + Each record gets a ``source_file`` attribute from the zip's file name without the .zip extension. @@ -113,14 +113,10 @@ def _load_records_from_zip_file(zip_file, use_file_names_as_ids=False): except Exception: content_stream = BytesIO(f.read("rb")) try: - record = snapgene_file_to_seqrecord( - fileobject=content_stream - ) + record = snapgene_file_to_seqrecord(fileobject=content_stream) new_records, _ = [record], "snapgene" except Exception: - raise ValueError( - "Format not recognized for file " + f._path - ) + raise ValueError("Format not recognized for file " + f._path) single_record = len(new_records) == 1 for i, record in enumerate(new_records): @@ -168,24 +164,21 @@ def load_records_from_file(filepath): return records, fmt -def load_records_from_files( - files=None, folder=None, use_file_names_as_ids=False -): +def load_records_from_files(files=None, folder=None, use_file_names_as_ids=False): """Automatically convert files or a folder's content to biopython records. Parameters ---------- files - A list of path to files. A ``folder`` can be provided instead - + A list of path to files. A ``folder`` can be provided instead. + folder A path to a folder containing sequence files. - + use_file_names_as_ids If True, for every file containing a single record, the file name - (without extension) will be set as the record's ID. - + (without extension) will be set as the record's ID. """ if files is not None: for file in files: @@ -235,7 +228,7 @@ def load_records_from_files( def write_record(record, target, fmt="genbank"): - """Write a record as genbank, fasta, etc. via Biopython, with fixes""" + """Write a record as genbank, fasta, etc. via Biopython, with fixes.""" record = deepcopy(record) record.id = record.id[:20] if str(record.seq.alphabet.__class__.__name__) != "DNAAlphabet": From df0369891c6ffe97cc646a413acb8560a9c2769c Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 13:57:06 +0100 Subject: [PATCH 3/8] Biopython v1.78 fix --- .../HomologousFragment/HomologousFragment.py | 17 +++- .../Fragment/StickyEndFragment/StickyEnd.py | 23 +++-- .../StickyEndFragment/StickyEndFragment.py | 22 ++++- .../StickyEndFragment/StickyEndSeq.py | 89 ++++++++++--------- dnacauldron/biotools/record_operations.py | 35 ++++++-- dnacauldron/biotools/sequence_io.py | 24 ++++- 6 files changed, 146 insertions(+), 64 deletions(-) diff --git a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py index dfe3b4d..7a117e8 100644 --- a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py +++ b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py @@ -1,7 +1,16 @@ from copy import deepcopy from Bio.SeqRecord import SeqRecord from ..Fragment import Fragment -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False + from ...biotools import set_record_topology, crop_record_with_saddling_features @@ -159,5 +168,9 @@ def assemble( annotate_homology=annotate_homologies, homology_checker=homology_checker, ) - result.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + result.seq.alphabet = DNAAlphabet() + result.annotations["molecule_type"] = "DNA" + return result diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py index cf0ab15..cd36134 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py @@ -1,4 +1,13 @@ from Bio.Seq import Seq + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from ...biotools import sequence_to_biopython_record, annotate_record @@ -25,11 +34,15 @@ def __init__(self, data, strand, **k): self.strand = strand def reverse_complement(self): - return StickyEnd( - str(Seq.reverse_complement(self)), - strand=-self.strand, - alphabet=self.alphabet, - ) + + if has_dna_alphabet: # Biopython <1.78 + return StickyEnd( + str(Seq.reverse_complement(self)), + strand=-self.strand, + alphabet=self.alphabet, + ) + else: + return StickyEnd(str(Seq.reverse_complement(self)), strand=-self.strand,) def __repr__(self): return "%s(%s)" % (Seq.__str__(self), {1: "+", -1: "-"}[self.strand]) diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py index 69c695f..44e96a7 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py @@ -1,6 +1,14 @@ from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from ...biotools import ( set_record_topology, @@ -83,7 +91,11 @@ def assemble(fragments, circularize=False, annotate_homologies=False): ) if circularize: result = result.circularized(annotate_homology=annotate_homologies) - result.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + result.seq.alphabet = DNAAlphabet() + result.annotations["molecule_type"] = "DNA" + return result def assemble_with(self, other, annotate_homology=False, annotation_type="homology"): @@ -99,7 +111,11 @@ def assemble_with(self, other, annotate_homology=False, annotation_type="homolog new_record = SeqRecord.__add__(selfc, connector).__add__(other) new_record.seq = self.seq + other.seq new_record.__class__ = StickyEndFragment - new_record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + new_record.seq.alphabet = DNAAlphabet() + new_record.annotations["molecule_type"] = "DNA" + return new_record @staticmethod diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py index bfc2341..b1bc34c 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py @@ -1,4 +1,13 @@ from Bio.Seq import Seq + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from ...biotools import set_record_topology from .StickyEnd import StickyEnd @@ -21,29 +30,42 @@ def reverse_complement(self): left-right versions are interchanged and reverse complemented. """ - return StickyEndSeq( - str(Seq.reverse_complement(self)), - left_end=None - if self.right_end is None - else self.right_end.reverse_complement(), - right_end=None - if self.left_end is None - else self.left_end.reverse_complement(), - alphabet=self.alphabet, - ) + + if has_dna_alphabet: # Biopython <1.78 + sticky_end_seq = StickyEndSeq( + str(Seq.reverse_complement(self)), + left_end=None + if self.right_end is None + else self.right_end.reverse_complement(), + right_end=None + if self.left_end is None + else self.left_end.reverse_complement(), + alphabet=self.alphabet, + ) + else: + sticky_end_seq = StickyEndSeq( + str(Seq.reverse_complement(self)), + left_end=None + if self.right_end is None + else self.right_end.reverse_complement(), + right_end=None + if self.left_end is None + else self.left_end.reverse_complement(), + ) + + return sticky_end_seq def will_clip_in_this_order_with(self, other): """Return whether this sequence will clip in this order with another. """ - return ( - self.right_end is not None - ) and self.right_end.will_clip_directly_with(other.left_end) + return (self.right_end is not None) and self.right_end.will_clip_directly_with( + other.left_end + ) def circularized(self): if not self.will_clip_in_this_order_with(self): raise ValueError( - "Only constructs with two compatible sticky ends" - " can be circularized" + "Only constructs with two compatible sticky ends" " can be circularized" ) result = Seq(str(self.left_end)) + self set_record_topology(result, "circular") @@ -53,15 +75,9 @@ def __repr__(self): content = Seq.__str__(self) if len(content) > 15: content = ( - content[:5].lower() - + ("(%d)" % len(content)) - + content[-5:].lower() + content[:5].lower() + ("(%d)" % len(content)) + content[-5:].lower() ) - return "(%s-%s-%s)" % ( - repr(self.left_end), - content, - repr(self.right_end), - ) + return "(%s-%s-%s)" % (repr(self.left_end), content, repr(self.right_end),) def __add__(self, other): assert self.will_clip_in_this_order_with(other) @@ -134,12 +150,9 @@ def list_from_sequence_digestion(sequence, enzyme, linear=True): sticky_fragments = [StickyEndSeq(fragments[0])] for f in fragments[1:]: overhang_bit, new_fragment_seq = f[:overhang], f[overhang:] - sticky_fragments[-1].right_end = StickyEnd( - overhang_bit, right_end_sign - ) + sticky_fragments[-1].right_end = StickyEnd(overhang_bit, right_end_sign) new_fragment = StickyEndSeq( - new_fragment_seq, - left_end=StickyEnd(overhang_bit, -right_end_sign), + new_fragment_seq, left_end=StickyEnd(overhang_bit, -right_end_sign), ) sticky_fragments.append(new_fragment) if not linear: @@ -163,26 +176,16 @@ def list_from_sequence_digestion(sequence, enzyme, linear=True): sticky_fragments[0].left_end = first_left_end sticky_fragments = [ StickyEndSeq( - new_fragment_seq, - left_end=left_end, - right_end=last_right_end, + new_fragment_seq, left_end=left_end, right_end=last_right_end, ) ] else: - sticky_fragments.append( - StickyEndSeq(fragments[-1], left_end=left_end) - ) - if ( - hasattr(sequence, "left_end") - and sticky_fragments[0].left_end is None - ): + sticky_fragments.append(StickyEndSeq(fragments[-1], left_end=left_end)) + if hasattr(sequence, "left_end") and sticky_fragments[0].left_end is None: sticky_fragments[0].left_end = sequence.left_end - if ( - hasattr(sequence, "right_end") - and sticky_fragments[-1].right_end is None - ): + if hasattr(sequence, "right_end") and sticky_fragments[-1].right_end is None: sticky_fragments[-1].right_end = sequence.right_end return sticky_fragments - + def ends_tuple(self): return (str(self.left_end), str(self.right_end)) diff --git a/dnacauldron/biotools/record_operations.py b/dnacauldron/biotools/record_operations.py index fcdc1dc..2c1fe40 100644 --- a/dnacauldron/biotools/record_operations.py +++ b/dnacauldron/biotools/record_operations.py @@ -1,5 +1,13 @@ from copy import copy -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation @@ -61,12 +69,25 @@ def sequence_to_biopython_record( sequence, id="", name="same_as_id", features=() ): """Return a SeqRecord of the sequence, ready to be Genbanked.""" - return SeqRecord( - Seq(sequence, alphabet=DNAAlphabet()), - id=id, - name=id if name == "same_as_id" else name, - features=list(features), - ) + + if has_dna_alphabet: + seqrecord = SeqRecord( + Seq(sequence, alphabet=DNAAlphabet()), + id=id, + name=id if name == "same_as_id" else name, + features=list(features), + ) + else: + seqrecord = SeqRecord( + Seq(sequence), + id=id, + name=id if name == "same_as_id" else name, + features=list(features), + ) + + seqrecord.annotations["molecule_type"] = "DNA" + + return seqrecord def annotate_record( diff --git a/dnacauldron/biotools/sequence_io.py b/dnacauldron/biotools/sequence_io.py index 20b8323..4ade0a5 100644 --- a/dnacauldron/biotools/sequence_io.py +++ b/dnacauldron/biotools/sequence_io.py @@ -5,7 +5,15 @@ import flametree from snapgene_reader import snapgene_file_to_seqrecord from Bio import SeqIO -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from .record_operations import ( set_record_topology, sequence_to_biopython_record, @@ -210,7 +218,11 @@ def load_records_from_files(files=None, folder=None, use_file_names_as_ids=False "", "Exported", ] - record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + record.seq.alphabet = DNAAlphabet() + record.annotations["molecule_type"] = "DNA" + # Sorry for this parts, it took a lot of "whatever works". # keep your part names under 20c and pointless, and everything # will be good @@ -231,8 +243,12 @@ def write_record(record, target, fmt="genbank"): """Write a record as genbank, fasta, etc. via Biopython, with fixes.""" record = deepcopy(record) record.id = record.id[:20] - if str(record.seq.alphabet.__class__.__name__) != "DNAAlphabet": - record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + if str(record.seq.alphabet.__class__.__name__) != "DNAAlphabet": + record.seq.alphabet = DNAAlphabet() + record.annotations["molecule_type"] = "DNA" + if hasattr(target, "open"): target = target.open("w") SeqIO.write(record, target, fmt) From 7f724b085592bf2b33809e6cb0b86ae9e673106a Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 14:03:22 +0100 Subject: [PATCH 4/8] Fix typo in method name --- dnacauldron/Fragment/Fragment.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/dnacauldron/Fragment/Fragment.py b/dnacauldron/Fragment/Fragment.py index 6a048cd..b1815b0 100644 --- a/dnacauldron/Fragment/Fragment.py +++ b/dnacauldron/Fragment/Fragment.py @@ -19,7 +19,7 @@ def from_biopython_record(biopython_record): def plot(self, ax=None): """Plot the fragment and its features on a Matplotlib ax. - + This creates a new ax if no ax is provided. The ax is returned at the end. """ @@ -37,9 +37,9 @@ def reverse_complement(self): def to_standard_string(self): """Return a standard string to represent and identify the fragment. - + This method is used to standardize and recognize similar FragmentChain - instances. + instances. """ return str(self.seq) @@ -53,13 +53,11 @@ def create_homology_annotation( "ApEinfo_fwdcolor": color, } return SeqFeature( - FeatureLocation(start, end), - type=annotation_type, - qualifiers=qualifiers, + FeatureLocation(start, end), type=annotation_type, qualifiers=qualifiers, ) - + def text_representation_in_plots(self): return r"$\bf{%s}$" % self.original_part.id - - def as_bioptyhon_record(self): + + def as_biopython_record(self): return self From 44c6fda31640c255907bb51c424c3ef168071382 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 16:54:01 +0100 Subject: [PATCH 5/8] Remove StickyEndSeq.circularized() method The method returns an error, beacuse StickyEndSeq has no `annotations` that can be set. All circularized() calls in the codebase are made from StickyEndFragment or HomologousFragment. --- dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py index b1bc34c..52d0f9c 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py @@ -8,7 +8,6 @@ except ImportError: # Biopython >=1.78 has_dna_alphabet = False -from ...biotools import set_record_topology from .StickyEnd import StickyEnd @@ -62,15 +61,6 @@ def will_clip_in_this_order_with(self, other): other.left_end ) - def circularized(self): - if not self.will_clip_in_this_order_with(self): - raise ValueError( - "Only constructs with two compatible sticky ends" " can be circularized" - ) - result = Seq(str(self.left_end)) + self - set_record_topology(result, "circular") - return result - def __repr__(self): content = Seq.__str__(self) if len(content) > 15: From 73222e8c0b4a80b9d0df784ee3df2ed90eb114fa Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 16:54:14 +0100 Subject: [PATCH 6/8] Added tests for Sticky classes --- tests/test_fragments/test_fragments.py | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_fragments/test_fragments.py diff --git a/tests/test_fragments/test_fragments.py b/tests/test_fragments/test_fragments.py new file mode 100644 index 0000000..eb643fa --- /dev/null +++ b/tests/test_fragments/test_fragments.py @@ -0,0 +1,42 @@ +import pytest +from Bio.Seq import Seq +from dnacauldron.Fragment import ( + StickyEnd, + StickyEndSeq, + StickyEndFragment, +) + + +def test_StickyEnd(): + sticky_end = StickyEnd(Seq("ATGC"), strand=1) + assert sticky_end.__repr__() == "ATGC(+)" + + +def test_StickyEndSeq(): + sticky = StickyEndSeq( + Seq("AAA"), + left_end=StickyEnd("ATCG", strand=+1), + # RC of left end so that it self-anneals: + right_end=StickyEnd("ATCG", strand=-1), + ) + assert sticky.__repr__() == "(ATCG(+)-AAA-ATCG(-))" + + # Longer than 15 bp: + sticky = StickyEndSeq( + Seq("AAAATTTTCCCCGGGG"), + left_end=StickyEnd("ATCG", strand=+1), + right_end=StickyEnd("ATCG", strand=-1), + ) + assert sticky.__repr__() == "(ATCG(+)-aaaat(16)cgggg-ATCG(-))" + + +def test_StickyEndSeqFragment(): + sticky = StickyEndSeq( + Seq("TTT"), + left_end=StickyEnd("AAAA", strand=+1), + # Incompatible overhang: + right_end=StickyEnd("ATCG", strand=-1), + ) + sticky_fragment = StickyEndFragment(sticky) + with pytest.raises(ValueError): + sticky_fragment.circularized() From 2c805cf555061e3d4f1a5e9d7ba9b0f42027db6d Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 18:06:46 +0100 Subject: [PATCH 7/8] v2.0.2 Removed method StickyEndSeq.circularized() did not work, therefore API is not considered to be changed. --- dnacauldron/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dnacauldron/version.py b/dnacauldron/version.py index 159d48b..0309ae2 100644 --- a/dnacauldron/version.py +++ b/dnacauldron/version.py @@ -1 +1 @@ -__version__ = "2.0.1" +__version__ = "2.0.2" From b61b7e78de673616c31bdce0e3b1eb9ae099f6a1 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Tue, 8 Sep 2020 18:10:26 +0100 Subject: [PATCH 8/8] PyPI readme update --- pypi-readme.rst | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pypi-readme.rst b/pypi-readme.rst index 85936fd..0d63cf7 100644 --- a/pypi-readme.rst +++ b/pypi-readme.rst @@ -3,18 +3,13 @@ DNA Cauldron DNA Cauldron provides a generic cloning simulation framework to predict constructs sequences and detect assembly flaws. -It can simulate Golden-Gate support, with extra features like enzyme or +It can simulate Golden-Gate, with extra features like enzyme or connector parts autoselection, and other methods like Gibson Assembly, LCR Assembly, BASIC assembly, BioBrick assembly. It supports single and combinatorial assemblies, hierarchical assemblies, and produces nice reports for traceability and troubleshooting. -.. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/DnaCauldron/master/docs/_static/images/report_elements.png - :alt: [logo] - :align: center - :width: 800px - Infos ----- @@ -28,11 +23,11 @@ Infos ``_ -**Github Page** +**Github Page:** ``_ -**Live demo** +**Live demo:** ``_