Skip to content

Commit

Permalink
Fix #86
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Jan 27, 2025
1 parent c429d82 commit 251d33e
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 52 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ The example below will generate a random sequence and optimize it so that:
EnforceTranslation(location=(500, 1400))
],
objectives=[CodonOptimize(species='e_coli', location=(500, 1400))]
)
) # Note: always use a codon optimisation specification with EnforceTranslation
# SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE
Expand Down
30 changes: 14 additions & 16 deletions dnachisel/builtin_specifications/EnforceTranslation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class EnforceTranslation(CodonSpecification):
Shorthand for annotations: "cds".
Note: always use a codon optimisation specification with EnforceTranslation.
Parameters
-----------
Expand Down Expand Up @@ -98,10 +101,7 @@ def set_location(self, location):
len(location) != 3 * len(self.translation)
):
raise ValueError(
(
"Window size (%d bp) incompatible with translation "
"(%d aa)"
)
("Window size (%d bp) incompatible with translation " "(%d aa)")
% (len(location), len(self.translation))
)
self.location = location
Expand All @@ -119,10 +119,7 @@ def initialized_on_problem(self, problem, role):
result = result.copy_with_changes(translation=translation)
if len(result.location) != 3 * len(result.translation):
raise ValueError(
(
"Window size (%d bp) incompatible with translation "
"(%d aa)"
)
("Window size (%d bp) incompatible with translation " "(%d aa)")
% (len(result.location), len(result.translation))
)
if (result.start_codon is not None) and result.translation[0] != "M":
Expand Down Expand Up @@ -163,9 +160,11 @@ def evaluate(self, problem):
problem,
score=-len(errors_locations),
locations=errors_locations,
message="All OK."
if len(errors_locations) == 0
else "Wrong translation at locations %s" % errors_locations,
message=(
"All OK."
if len(errors_locations) == 0
else "Wrong translation at locations %s" % errors_locations
),
)

def localized_on_window(self, new_location, start_codon, end_codon):
Expand All @@ -179,7 +178,7 @@ def localized_on_window(self, new_location, start_codon, end_codon):
translation=new_translation,
boost=self.boost,
genetic_table=self.genetic_table,
start_codon=self.start_codon if location_is_at_start else None
start_codon=self.start_codon if location_is_at_start else None,
# has_start_codon=self.has_start_codon and location_is_at_start,
)

Expand All @@ -196,11 +195,10 @@ def get_first_codon_choices(first_codon):
return [first_codon]
else:
return [self.start_codon] # "ATG"

first_codon_location = self.codon_index_to_location(0)
first_codon = first_codon_location.extract_sequence(sequence)
choices = [
(first_codon_location, get_first_codon_choices(first_codon))
] + [
choices = [(first_codon_location, get_first_codon_choices(first_codon))] + [
(self.codon_index_to_location(i), self.backtranslation_table[aa])
for i, aa in list(enumerate(self.translation))[1:]
]
Expand All @@ -221,6 +219,6 @@ def __str__(self):

def short_label(self):
return "cds"

def breach_label(self):
return "protein sequence changed"
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ class AvoidRareCodons(BaseCodonOptimizationClass):
This can be seen as a "mild" form of codon optimization where only rare
codons (which slow down protein synthesis) are considered.
WARNING: Make sure to always use this specification with EnforceTranslation
to preserve the amino-acid sequence.
Warning: always use this specification with an EnforceTranslation constraint
defined over the same location, to preserve the amino acid sequence.
Shorthand for annotations: "no_rare_codons".
Expand All @@ -25,7 +25,7 @@ class AvoidRareCodons(BaseCodonOptimizationClass):
Name or TaxID of the species for which to optimize the sequence. A custom
codon_usage_table can be provided instead (or in addition, for species
names whose codon usage table cannot be imported).
codon_usage_table
Optional codon usage table of the species for which the sequence will be
codon-optimized, which can be provided instead of ``species``. A dict of
Expand Down Expand Up @@ -111,17 +111,17 @@ def evaluate(self, problem):
problem,
score=score,
locations=locations,
message="All OK."
if len(locations) == 0
else "Rare codons at locations %s" % locations,
message=(
"All OK."
if len(locations) == 0
else "Rare codons at locations %s" % locations
),
)

def restrict_nucleotides(self, sequence, location=None):
nonrare_codons = list(self.nonrare_codons)
if self.location.strand == -1:
nonrare_codons = sorted(
[reverse_complement(c) for c in nonrare_codons]
)
nonrare_codons = sorted([reverse_complement(c) for c in nonrare_codons])
return [
((i, i + 3), nonrare_codons)
for i in range(self.location.start, self.location.end, 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def CodonOptimize(
codon_usage_table=None,
original_species=None,
original_codon_usage_table=None,
boost=1.0
boost=1.0,
):
"""Codon-optimize a coding sequence using a user-selected method.
Expand All @@ -28,6 +28,10 @@ def CodonOptimize(
codon whose usage in the target organism matches the usage of the
original codon in its host organism (as per Claassens 2017).
Warning: always use this specification with an EnforceTranslation constraint
defined over the same location, to preserve the amino acid sequence.
Parameters
==========
species
Expand Down Expand Up @@ -105,5 +109,7 @@ def CodonOptimize(
original_codon_usage_table=original_codon_usage_table,
boost=boost,
)
raise ValueError("`method` must be 'use_best_codon', 'match_codon_usage' "
f"or 'harmonize_rca', not {method!r}")
raise ValueError(
"`method` must be 'use_best_codon', 'match_codon_usage' "
f"or 'harmonize_rca', not {method!r}"
)
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class HarmonizeRCA(BaseCodonOptimizationClass):
algorithm (Angov 2008), which was much more complicated as it involved
predicting "ribosome pausing" sites in the sequence.
Warning: always use with an EnforceTranslation constraint.
Warning: always use this specification with an EnforceTranslation constraint
defined over the same location, to preserve the amino acid sequence.
Parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class MatchTargetCodonUsage(BaseCodonOptimizationClass):
host-to-target codon harmonization. See DnaChisel's HarmonizeRCA class
for Codon Harmonization.
Warning: always use this specification with an EnforceTranslation constraint
defined over the same location, to preserve the amino acid sequence.
Parameters
----------
Expand Down Expand Up @@ -71,9 +75,7 @@ class MatchTargetCodonUsage(BaseCodonOptimizationClass):

shorthand_name = "match_codon_usage"

def __init__(
self, species=None, location=None, codon_usage_table=None, boost=1.0
):
def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0):
BaseCodonOptimizationClass.__init__(
self,
species=species,
Expand Down Expand Up @@ -130,8 +132,7 @@ def evaluate(self, problem):
problem,
score=score,
locations=locations,
message="Codon opt. on window %s scored %.02E"
% (self.location, score),
message="Codon opt. on window %s scored %.02E" % (self.location, score),
)

def localized_on_window(self, new_location, start_codon, end_codon):
Expand Down Expand Up @@ -177,9 +178,7 @@ def compare_frequencies(self, codons, text_mode=False):
for i, codon in enumerate(codons):
codons_positions[codon].append(i)
# aa: amino-acid
codons_frequencies = {
aa: {"total": 0} for aa in self.codon_usage_table
}
codons_frequencies = {aa: {"total": 0} for aa in self.codon_usage_table}
for codon, positions in codons_positions.items():
count = len(positions)
aa = self.codons_translations[codon]
Expand All @@ -191,9 +190,7 @@ def compare_frequencies(self, codons, text_mode=False):
if codon != "total":
data[codon] = 1.0 * value / total
codons_frequencies = {
aa: data
for aa, data in codons_frequencies.items()
if data["total"]
aa: data for aa, data in codons_frequencies.items() if data["total"]
}
comparisons = {
aa: {
Expand All @@ -209,8 +206,9 @@ def compare_frequencies(self, codons, text_mode=False):
return dict_to_pretty_string(comparisons)
else:
return codons_positions, comparisons

def short_label(self):
result = "match-codon-usage"
if self.species is not None:
result += " (%s)" % self.species
return result
return result
18 changes: 8 additions & 10 deletions dnachisel/builtin_specifications/codon_optimization/MaximizeCAI.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class MaximizeCAI(BaseCodonOptimizationClass):
This score is between -inf. and 0 (0 meaning a perfectly optimal sequence).
Warning: always use this specification with an EnforceTranslation constraint
defined over the same location, to preserve the amino acid sequence.
Parameters
----------
Expand Down Expand Up @@ -65,9 +69,7 @@ class MaximizeCAI(BaseCodonOptimizationClass):

shorthand_name = "use_best_codon"

def __init__(
self, species=None, location=None, codon_usage_table=None, boost=1.0
):
def __init__(self, species=None, location=None, codon_usage_table=None, boost=1.0):
BaseCodonOptimizationClass.__init__(
self,
species=species,
Expand Down Expand Up @@ -105,12 +107,10 @@ def evaluate(self, problem):
problem,
score=freq - optimal,
locations=[] if (freq == optimal) else [self.location],
message="Codon opt. on window %s scored %.02E"
% (self.location, score),
message="Codon opt. on window %s scored %.02E" % (self.location, score),
)
current_usage = [
self.codon_usage_table["log_codons_frequencies"][codon]
for codon in codons
self.codon_usage_table["log_codons_frequencies"][codon] for codon in codons
]
optimal_usage = [
self.codon_usage_table["log_best_frequencies"][ct[codon]]
Expand All @@ -125,8 +125,7 @@ def evaluate(self, problem):
problem,
score=score,
locations=locations,
message="Codon opt. on window %s scored %.02E"
% (self.location, score),
message="Codon opt. on window %s scored %.02E" % (self.location, score),
)

def label_parameters(self):
Expand All @@ -137,4 +136,3 @@ def short_label(self):
if self.species is not None:
result += " (%s)" % self.species
return result

3 changes: 3 additions & 0 deletions dnachisel/builtin_specifications/codon_optimization/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ optimization that one can find in the literature.

Finally, ``CodonOptimize`` is a generic pseudo-specification-class which uses a "mode"
parameter to return a specification of one of the above classes.

Warning: always use this specification with an EnforceTranslation constraint defined
over the same location, to preserve the amino acid sequence.

0 comments on commit 251d33e

Please sign in to comment.