diff --git a/LICENCE.txt b/LICENCE.txt index 75c43f8..39db994 100644 --- a/LICENCE.txt +++ b/LICENCE.txt @@ -1,8 +1,4 @@ - -The MIT License (MIT) -[OSI Approved License] - -The MIT License (MIT) +MIT License Copyright (c) 2020 Edinburgh Genome Foundry @@ -13,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/dnacauldron/Fragment/Fragment.py b/dnacauldron/Fragment/Fragment.py index 6a048cd..b1815b0 100644 --- a/dnacauldron/Fragment/Fragment.py +++ b/dnacauldron/Fragment/Fragment.py @@ -19,7 +19,7 @@ def from_biopython_record(biopython_record): def plot(self, ax=None): """Plot the fragment and its features on a Matplotlib ax. - + This creates a new ax if no ax is provided. The ax is returned at the end. """ @@ -37,9 +37,9 @@ def reverse_complement(self): def to_standard_string(self): """Return a standard string to represent and identify the fragment. - + This method is used to standardize and recognize similar FragmentChain - instances. + instances. """ return str(self.seq) @@ -53,13 +53,11 @@ def create_homology_annotation( "ApEinfo_fwdcolor": color, } return SeqFeature( - FeatureLocation(start, end), - type=annotation_type, - qualifiers=qualifiers, + FeatureLocation(start, end), type=annotation_type, qualifiers=qualifiers, ) - + def text_representation_in_plots(self): return r"$\bf{%s}$" % self.original_part.id - - def as_bioptyhon_record(self): + + def as_biopython_record(self): return self diff --git a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py index 94bdfb4..7a117e8 100644 --- a/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py +++ b/dnacauldron/Fragment/HomologousFragment/HomologousFragment.py @@ -1,14 +1,23 @@ from copy import deepcopy from Bio.SeqRecord import SeqRecord from ..Fragment import Fragment -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False + from ...biotools import set_record_topology, crop_record_with_saddling_features class HomologousFragment(Fragment): @staticmethod def from_biopython_record(biopython_record): - """Convert a biopython record into a HomologousFragment (class change). + """Convert a Biopython record into a HomologousFragment (class change). """ new_record = deepcopy(biopython_record) new_record.original_part = biopython_record @@ -16,12 +25,9 @@ def from_biopython_record(biopython_record): return new_record def circularized( - self, - homology_checker, - annotate_homology=False, - annotation_type="homology", + self, homology_checker, annotate_homology=False, annotation_type="homology", ): - """Return the biopython record obtained by cirularizing the result. + """Return the Biopython record obtained by cirularizing the result. Only works if the left and right sticky ends are compatible. The return is a simple Biopython record where the sticky end has been @@ -33,8 +39,10 @@ def circularized( annotate_homology=True, annotation_type="homology", ) + def only_parts_indicators(feature): return feature.qualifiers.get("indicates_part", False) + result = crop_record_with_saddling_features( record=double_self, start=len(self), @@ -64,13 +72,11 @@ def _push_source_features(self, homology_size, side="left"): def will_clip_in_this_order_with(self, other_fragment, homology_checker): """Return whether the fragment will assemble with anoter via homology recombination. - + homology_checker should be an HomologyChecker instance definining the homology conditions. """ - homology_size = homology_checker.find_end_homologies( - self, other_fragment - ) + homology_size = homology_checker.find_end_homologies(self, other_fragment) return homology_size > 0 def assemble_with( @@ -82,16 +88,16 @@ def assemble_with( ): """Return the fragment resulting from the assembly of this fragment with another, in that order. - + Parameters ---------- - + fragment - The other parameter to assemble with - + The other parameter to assemble with. + homology_checker An HomologyChecker instance definining the homology conditions. - + annotate_homology If true, homologies will have an annotation in the final, predicted construct records. @@ -130,10 +136,7 @@ def only_parts_indicators(feature): @staticmethod def assemble( - fragments, - homology_checker, - circularize=False, - annotate_homologies=False, + fragments, homology_checker, circularize=False, annotate_homologies=False, ): """Return the record obtained by assembling the fragments. @@ -141,19 +144,17 @@ def assemble( ---------- fragments - List of HomologousFragments to assemble + List of HomologousFragments to assemble. homology_checker An HomologyChecker instance definining the homology conditions. - + circularize True to also assemble the end flanks of the final construct. annotate_homologies If true, homologies will have an annotation in the final, predicted construct records. - - """ result = fragments[0] for fragment in fragments[1:]: @@ -167,5 +168,9 @@ def assemble( annotate_homology=annotate_homologies, homology_checker=homology_checker, ) - result.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + result.seq.alphabet = DNAAlphabet() + result.annotations["molecule_type"] = "DNA" + return result diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py index f34f9c5..cd36134 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEnd.py @@ -1,6 +1,16 @@ from Bio.Seq import Seq + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from ...biotools import sequence_to_biopython_record, annotate_record + class StickyEnd(Seq): """A class to represent the sticky end of a sequence. @@ -10,10 +20,10 @@ class StickyEnd(Seq): ---------- data - A DNA sequence in ATGC format + A DNA sequence in ATGC format. strand - The strand (+1 or -1) on which the protusion is + The strand (+1 or -1) on which the protusion is. **k Optional keyword arguments for the sequence, such as ``alphabet`` etc. @@ -24,11 +34,15 @@ def __init__(self, data, strand, **k): self.strand = strand def reverse_complement(self): - return StickyEnd( - str(Seq.reverse_complement(self)), - strand=-self.strand, - alphabet=self.alphabet, - ) + + if has_dna_alphabet: # Biopython <1.78 + return StickyEnd( + str(Seq.reverse_complement(self)), + strand=-self.strand, + alphabet=self.alphabet, + ) + else: + return StickyEnd(str(Seq.reverse_complement(self)), strand=-self.strand,) def __repr__(self): return "%s(%s)" % (Seq.__str__(self), {1: "+", -1: "-"}[self.strand]) @@ -40,10 +54,9 @@ def will_clip_directly_with(self, other): and (self.strand == -other.strand) and (str(self) == str(other)) ) - + def as_biopython_record(self): record = sequence_to_biopython_record(str(self)) sign = "+" if self.strand == 1 else "-" annotate_record(record, label="(%s) strand" % sign) return record - diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py index 18b33b1..44e96a7 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndFragment.py @@ -1,12 +1,20 @@ from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from ...biotools import ( set_record_topology, crop_record_with_saddling_features, sequence_to_biopython_record, - annotate_record + annotate_record, ) from ..Fragment import Fragment from .StickyEnd import StickyEnd @@ -17,7 +25,7 @@ class StickyEndFragment(Fragment): """Biopython SeqRecord whose sequence has sticky ends.""" def will_clip_in_this_order_with(self, other): - """Return True iff this record's right sticky end is complementary with + """Return True if this record's right sticky end is complementary with the other record's left sticky end.""" right_end = self.seq.right_end return (right_end is not None) and right_end.will_clip_directly_with( @@ -25,12 +33,9 @@ def will_clip_in_this_order_with(self, other): ) def circularized( - self, - annotate_homology=False, - annotation_type="homology", - qualifiers=None, + self, annotate_homology=False, annotation_type="homology", qualifiers=None, ): - """Return the biopython record obtained by cirularizing the result. + """Return the Biopython record obtained by cirularizing the result. Only works if the left and right sticky ends are compatible. The return is a simple Biopython record where the sticky end has been @@ -38,8 +43,7 @@ def circularized( """ if not self.will_clip_in_this_order_with(self): raise ValueError( - "Only constructs with two compatible sticky ends" - " can be circularized" + "Only constructs with two compatible sticky ends" " can be circularized" ) connector = SeqRecord(Seq(str(self.seq.left_end))) if annotate_homology: @@ -57,10 +61,7 @@ def annotate_connector(self, connector, annotation_type="homology"): else: label = str(connector.seq) feature = self.create_homology_annotation( - start=0, - end=len(connector), - annotation_type=annotation_type, - label=label, + start=0, end=len(connector), annotation_type=annotation_type, label=label, ) connector.features = [feature] @@ -72,8 +73,8 @@ def assemble(fragments, circularize=False, annotate_homologies=False): ---------- fragments - List of StickyEndFragments to assemble - + List of StickyEndFragments to assemble. + circularize True to also assemble the end flanks of the final construct (results in a Biopython Record), false to not do it (the result is then a @@ -82,8 +83,6 @@ def assemble(fragments, circularize=False, annotate_homologies=False): annotate_homologies If true, homologies will have an annotation in the final, predicted construct records. - - """ result = fragments[0] for fragment in fragments[1:]: @@ -92,12 +91,14 @@ def assemble(fragments, circularize=False, annotate_homologies=False): ) if circularize: result = result.circularized(annotate_homology=annotate_homologies) - result.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + result.seq.alphabet = DNAAlphabet() + result.annotations["molecule_type"] = "DNA" + return result - def assemble_with( - self, other, annotate_homology=False, annotation_type="homology" - ): + def assemble_with(self, other, annotate_homology=False, annotation_type="homology"): connector_str = str(self.seq.right_end) connector = SeqRecord(Seq(connector_str)) if annotate_homology: @@ -110,7 +111,11 @@ def assemble_with( new_record = SeqRecord.__add__(selfc, connector).__add__(other) new_record.seq = self.seq + other.seq new_record.__class__ = StickyEndFragment - new_record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + new_record.seq.alphabet = DNAAlphabet() + new_record.annotations["molecule_type"] = "DNA" + return new_record @staticmethod @@ -118,17 +123,13 @@ def list_from_record_digestion(record, enzyme, linear="auto"): if linear == "auto": linear = record.annotations.get("topology", "linear") == "linear" if isinstance(enzyme, (list, tuple)): - n_cuts = sum( - [len(e.search(record.seq, linear=linear)) for e in enzyme] - ) + n_cuts = sum([len(e.search(record.seq, linear=linear)) for e in enzyme]) else: n_cuts = len(enzyme.search(record.seq, linear=linear)) if n_cuts == 0: return [record] if not linear: - record.features = [ - f for f in record.features if f.location is not None - ] + record.features = [f for f in record.features if f.location is not None] record_fragments = StickyEndFragment.list_from_record_digestion( record + record, enzyme=enzyme, linear=True ) diff --git a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py index bfc2341..52d0f9c 100644 --- a/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py +++ b/dnacauldron/Fragment/StickyEndFragment/StickyEndSeq.py @@ -1,5 +1,13 @@ from Bio.Seq import Seq -from ...biotools import set_record_topology + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from .StickyEnd import StickyEnd @@ -21,47 +29,45 @@ def reverse_complement(self): left-right versions are interchanged and reverse complemented. """ - return StickyEndSeq( - str(Seq.reverse_complement(self)), - left_end=None - if self.right_end is None - else self.right_end.reverse_complement(), - right_end=None - if self.left_end is None - else self.left_end.reverse_complement(), - alphabet=self.alphabet, - ) + + if has_dna_alphabet: # Biopython <1.78 + sticky_end_seq = StickyEndSeq( + str(Seq.reverse_complement(self)), + left_end=None + if self.right_end is None + else self.right_end.reverse_complement(), + right_end=None + if self.left_end is None + else self.left_end.reverse_complement(), + alphabet=self.alphabet, + ) + else: + sticky_end_seq = StickyEndSeq( + str(Seq.reverse_complement(self)), + left_end=None + if self.right_end is None + else self.right_end.reverse_complement(), + right_end=None + if self.left_end is None + else self.left_end.reverse_complement(), + ) + + return sticky_end_seq def will_clip_in_this_order_with(self, other): """Return whether this sequence will clip in this order with another. """ - return ( - self.right_end is not None - ) and self.right_end.will_clip_directly_with(other.left_end) - - def circularized(self): - if not self.will_clip_in_this_order_with(self): - raise ValueError( - "Only constructs with two compatible sticky ends" - " can be circularized" - ) - result = Seq(str(self.left_end)) + self - set_record_topology(result, "circular") - return result + return (self.right_end is not None) and self.right_end.will_clip_directly_with( + other.left_end + ) def __repr__(self): content = Seq.__str__(self) if len(content) > 15: content = ( - content[:5].lower() - + ("(%d)" % len(content)) - + content[-5:].lower() + content[:5].lower() + ("(%d)" % len(content)) + content[-5:].lower() ) - return "(%s-%s-%s)" % ( - repr(self.left_end), - content, - repr(self.right_end), - ) + return "(%s-%s-%s)" % (repr(self.left_end), content, repr(self.right_end),) def __add__(self, other): assert self.will_clip_in_this_order_with(other) @@ -134,12 +140,9 @@ def list_from_sequence_digestion(sequence, enzyme, linear=True): sticky_fragments = [StickyEndSeq(fragments[0])] for f in fragments[1:]: overhang_bit, new_fragment_seq = f[:overhang], f[overhang:] - sticky_fragments[-1].right_end = StickyEnd( - overhang_bit, right_end_sign - ) + sticky_fragments[-1].right_end = StickyEnd(overhang_bit, right_end_sign) new_fragment = StickyEndSeq( - new_fragment_seq, - left_end=StickyEnd(overhang_bit, -right_end_sign), + new_fragment_seq, left_end=StickyEnd(overhang_bit, -right_end_sign), ) sticky_fragments.append(new_fragment) if not linear: @@ -163,26 +166,16 @@ def list_from_sequence_digestion(sequence, enzyme, linear=True): sticky_fragments[0].left_end = first_left_end sticky_fragments = [ StickyEndSeq( - new_fragment_seq, - left_end=left_end, - right_end=last_right_end, + new_fragment_seq, left_end=left_end, right_end=last_right_end, ) ] else: - sticky_fragments.append( - StickyEndSeq(fragments[-1], left_end=left_end) - ) - if ( - hasattr(sequence, "left_end") - and sticky_fragments[0].left_end is None - ): + sticky_fragments.append(StickyEndSeq(fragments[-1], left_end=left_end)) + if hasattr(sequence, "left_end") and sticky_fragments[0].left_end is None: sticky_fragments[0].left_end = sequence.left_end - if ( - hasattr(sequence, "right_end") - and sticky_fragments[-1].right_end is None - ): + if hasattr(sequence, "right_end") and sticky_fragments[-1].right_end is None: sticky_fragments[-1].right_end = sequence.right_end return sticky_fragments - + def ends_tuple(self): return (str(self.left_end), str(self.right_end)) diff --git a/dnacauldron/README.md b/dnacauldron/README.md index c4e58d3..f51752a 100644 --- a/dnacauldron/README.md +++ b/dnacauldron/README.md @@ -26,7 +26,7 @@ An ***AssemblyMix*** contains a list of ***Fragment*** instances (which subclass - A ***StickyEndFragmentMix***: - Such mixes contain a list of ***StickyEndFragment*** instances. - A ***StickyEndFragment*** has a ***StickyEndSeq*** (subclass of BioPython's *Seq* with additional ***StickyEnd*** sequences on the left and right). - - Usable Subclasses include ***RestrictionLigationMix***, ***Type2sRestrictionMix***, ***BASICAssemblyMix***. + - Usable Subclasses include ***RestrictionLigationMix***, ***Type2sRestrictionMix***, ***BASICAssemblyMix***. StickyEndFragments are generated by restriction mixes, for example by RestrictionLigationMix. - An ***HomologousFragmentMix***: - Such mixes contain a list of ***HomologousFragment*** instances. - Mixes also require a ***HomologyChecker*** to detect homologies. diff --git a/dnacauldron/biotools/record_operations.py b/dnacauldron/biotools/record_operations.py index ecccce7..2c1fe40 100644 --- a/dnacauldron/biotools/record_operations.py +++ b/dnacauldron/biotools/record_operations.py @@ -1,10 +1,18 @@ - from copy import copy -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation + def complement(dna_sequence): """Return the complement of the DNA sequence. @@ -14,6 +22,7 @@ def complement(dna_sequence): """ return str(Seq(dna_sequence).complement()) + def set_record_topology(record, topology): """Set the Biopython record's topology, possibly passing if already set. @@ -29,9 +38,7 @@ def set_record_topology(record, topology): "default_to_linear", ] if topology not in valid_topologies: - raise ValueError( - "topology should be one of %s." % ", ".join(valid_topologies) - ) + raise ValueError("topology should be one of %s." % ", ".join(valid_topologies)) annotations = record.annotations default_prefix = "default_to_" if topology.startswith(default_prefix): @@ -62,20 +69,29 @@ def sequence_to_biopython_record( sequence, id="", name="same_as_id", features=() ): """Return a SeqRecord of the sequence, ready to be Genbanked.""" - return SeqRecord( - Seq(sequence, alphabet=DNAAlphabet()), - id=id, - name=id if name == "same_as_id" else name, - features=list(features), - ) + + if has_dna_alphabet: + seqrecord = SeqRecord( + Seq(sequence, alphabet=DNAAlphabet()), + id=id, + name=id if name == "same_as_id" else name, + features=list(features), + ) + else: + seqrecord = SeqRecord( + Seq(sequence), + id=id, + name=id if name == "same_as_id" else name, + features=list(features), + ) + + seqrecord.annotations["molecule_type"] = "DNA" + + return seqrecord def annotate_record( - seqrecord, - location="full", - feature_type="misc_feature", - margin=0, - **qualifiers + seqrecord, location="full", feature_type="misc_feature", margin=0, **qualifiers ): """Add a feature to a Biopython SeqRecord. @@ -83,19 +99,19 @@ def annotate_record( ---------- seqrecord - The biopython seqrecord to be annotated. + The Biopython seqrecord to be annotated. location Either (start, end) or (start, end, strand). (strand defaults to +1) feature_type - The type associated with the feature + The type associated with the feature. margin Number of extra bases added on each side of the given location. qualifiers - Dictionnary that will be the Biopython feature's `qualifiers` attribute. + Dictionary that will be the Biopython feature's `qualifiers` attribute. """ if location == "full": location = (margin, len(seqrecord) - margin) @@ -111,7 +127,7 @@ def annotate_record( def crop_record_with_saddling_features(record, start, end, filters=()): - """Crop the biopython record, but keep features that are only partially in. + """Crop the Biopython record, but keep features that are only partially in. Parameters ---------- @@ -119,11 +135,11 @@ def crop_record_with_saddling_features(record, start, end, filters=()): The Biopython record to crop. start, end - Coordinates of the segment to crop + Coordinates of the segment to crop. filters list of functions (feature=>True/False). Any feature that doesn't pass - at least one filter will be filtered out. + at least one filter will be filtered out. """ cropped = record[start:end] diff --git a/dnacauldron/biotools/sequence_io.py b/dnacauldron/biotools/sequence_io.py index 4c7e6c9..4ade0a5 100644 --- a/dnacauldron/biotools/sequence_io.py +++ b/dnacauldron/biotools/sequence_io.py @@ -5,7 +5,15 @@ import flametree from snapgene_reader import snapgene_file_to_seqrecord from Bio import SeqIO -from Bio.Alphabet import DNAAlphabet + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False from .record_operations import ( set_record_topology, sequence_to_biopython_record, @@ -45,7 +53,7 @@ def load_record( max_name_length=20, ): """Return a Biopython record read from a Fasta/Genbank/Snapgene file. - + Parameters ---------- @@ -56,16 +64,16 @@ def load_record( Can be "circular", "linear", "default_to_circular" (will default to circular if ``annotations['topology']`` is not already set) or "default_to_linear". - + id Sets the record.id. If "auto", the original record.id is used, and if none is set the name of the file (without extension) is used instead. - + upperize If true, the sequence will get upperized (recommended in this library, as the mix of upper and lower case can cause problems in Biopython's - enzyme sites search) - + enzyme site search). + max_name_length The name of the record will be truncated if too long to avoid Biopython exceptions being raised. @@ -95,7 +103,7 @@ def load_record( def _load_records_from_zip_file(zip_file, use_file_names_as_ids=False): """Return all fasta/genbank/snapgene in a zip as biopython records. - + Each record gets a ``source_file`` attribute from the zip's file name without the .zip extension. @@ -113,14 +121,10 @@ def _load_records_from_zip_file(zip_file, use_file_names_as_ids=False): except Exception: content_stream = BytesIO(f.read("rb")) try: - record = snapgene_file_to_seqrecord( - fileobject=content_stream - ) + record = snapgene_file_to_seqrecord(fileobject=content_stream) new_records, _ = [record], "snapgene" except Exception: - raise ValueError( - "Format not recognized for file " + f._path - ) + raise ValueError("Format not recognized for file " + f._path) single_record = len(new_records) == 1 for i, record in enumerate(new_records): @@ -168,24 +172,21 @@ def load_records_from_file(filepath): return records, fmt -def load_records_from_files( - files=None, folder=None, use_file_names_as_ids=False -): +def load_records_from_files(files=None, folder=None, use_file_names_as_ids=False): """Automatically convert files or a folder's content to biopython records. Parameters ---------- files - A list of path to files. A ``folder`` can be provided instead - + A list of path to files. A ``folder`` can be provided instead. + folder A path to a folder containing sequence files. - + use_file_names_as_ids If True, for every file containing a single record, the file name - (without extension) will be set as the record's ID. - + (without extension) will be set as the record's ID. """ if files is not None: for file in files: @@ -217,7 +218,11 @@ def load_records_from_files( "", "Exported", ] - record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + record.seq.alphabet = DNAAlphabet() + record.annotations["molecule_type"] = "DNA" + # Sorry for this parts, it took a lot of "whatever works". # keep your part names under 20c and pointless, and everything # will be good @@ -235,11 +240,15 @@ def load_records_from_files( def write_record(record, target, fmt="genbank"): - """Write a record as genbank, fasta, etc. via Biopython, with fixes""" + """Write a record as genbank, fasta, etc. via Biopython, with fixes.""" record = deepcopy(record) record.id = record.id[:20] - if str(record.seq.alphabet.__class__.__name__) != "DNAAlphabet": - record.seq.alphabet = DNAAlphabet() + + if has_dna_alphabet: # Biopython <1.78 + if str(record.seq.alphabet.__class__.__name__) != "DNAAlphabet": + record.seq.alphabet = DNAAlphabet() + record.annotations["molecule_type"] = "DNA" + if hasattr(target, "open"): target = target.open("w") SeqIO.write(record, target, fmt) diff --git a/dnacauldron/version.py b/dnacauldron/version.py index 159d48b..0309ae2 100644 --- a/dnacauldron/version.py +++ b/dnacauldron/version.py @@ -1 +1 @@ -__version__ = "2.0.1" +__version__ = "2.0.2" diff --git a/pypi-readme.rst b/pypi-readme.rst index 85936fd..0d63cf7 100644 --- a/pypi-readme.rst +++ b/pypi-readme.rst @@ -3,18 +3,13 @@ DNA Cauldron DNA Cauldron provides a generic cloning simulation framework to predict constructs sequences and detect assembly flaws. -It can simulate Golden-Gate support, with extra features like enzyme or +It can simulate Golden-Gate, with extra features like enzyme or connector parts autoselection, and other methods like Gibson Assembly, LCR Assembly, BASIC assembly, BioBrick assembly. It supports single and combinatorial assemblies, hierarchical assemblies, and produces nice reports for traceability and troubleshooting. -.. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/DnaCauldron/master/docs/_static/images/report_elements.png - :alt: [logo] - :align: center - :width: 800px - Infos ----- @@ -28,11 +23,11 @@ Infos ``_ -**Github Page** +**Github Page:** ``_ -**Live demo** +**Live demo:** ``_ diff --git a/tests/test_fragments/test_fragments.py b/tests/test_fragments/test_fragments.py new file mode 100644 index 0000000..eb643fa --- /dev/null +++ b/tests/test_fragments/test_fragments.py @@ -0,0 +1,42 @@ +import pytest +from Bio.Seq import Seq +from dnacauldron.Fragment import ( + StickyEnd, + StickyEndSeq, + StickyEndFragment, +) + + +def test_StickyEnd(): + sticky_end = StickyEnd(Seq("ATGC"), strand=1) + assert sticky_end.__repr__() == "ATGC(+)" + + +def test_StickyEndSeq(): + sticky = StickyEndSeq( + Seq("AAA"), + left_end=StickyEnd("ATCG", strand=+1), + # RC of left end so that it self-anneals: + right_end=StickyEnd("ATCG", strand=-1), + ) + assert sticky.__repr__() == "(ATCG(+)-AAA-ATCG(-))" + + # Longer than 15 bp: + sticky = StickyEndSeq( + Seq("AAAATTTTCCCCGGGG"), + left_end=StickyEnd("ATCG", strand=+1), + right_end=StickyEnd("ATCG", strand=-1), + ) + assert sticky.__repr__() == "(ATCG(+)-aaaat(16)cgggg-ATCG(-))" + + +def test_StickyEndSeqFragment(): + sticky = StickyEndSeq( + Seq("TTT"), + left_end=StickyEnd("AAAA", strand=+1), + # Incompatible overhang: + right_end=StickyEnd("ATCG", strand=-1), + ) + sticky_fragment = StickyEndFragment(sticky) + with pytest.raises(ValueError): + sticky_fragment.circularized()