Skip to content

Commit

Permalink
Merge pull request #24 from padix-key/optimization
Browse files Browse the repository at this point in the history
New parameters for the optimizer
  • Loading branch information
padix-key authored Jul 19, 2022
2 parents 39cf0f6 + 009ddf1 commit 73c758c
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 119 deletions.
2 changes: 1 addition & 1 deletion LICENSE.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
BSD 3-Clause License
--------------------

Copyright 2019 - 2021, Patrick Kunzmann, Benjamin Mayer
Copyright 2019 - 2022, Patrick Kunzmann, Benjamin Mayer
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
Expand Down
9 changes: 6 additions & 3 deletions doc/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,15 @@ In order to increase the quality of the scheme tha amount of optimization steps
(``--nsteps``) or the number of runs (``--nruns``) can be increased.
However, increasing these values also extends the runtime of the optimization.
Note that ``--nruns`` can take advantage of multiple cores.
The number of used cores is set with ``--nthreads``

The simulated annealing can be adjusted even more fine grained by setting
the initial reverse temperature (``--beta``) and the rate of its exponential
growth (``--rate``). The step size decreases in the course of the simulated
the inverse temperature at the first (``--beta-start``) and last
(``--beta-end``) step of the optimization.
For the steps in between the inverse temperature is interpolated exponentially.
The step size decreases in the course of the simulated
annealing also in an exponential manner, which can be parameterized via
``--step-size-start`` and ``--step-size-end``.
``--stepsize-start`` and ``--stepsize-end``.
The seed for the random number generator used by the algorithm is set with
the ``--seed`` option.
However, these parameters address the more advanced users.
Expand Down
2 changes: 1 addition & 1 deletion doc/plotgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def plot_generator(function):
def plot_main_example_alignment():
scheme_file = biotite.temp_file("json")
gecli.main(args=[
"--seed", "0",
"--seed", "3",
"--matrix", "BLOSUM62",
"--lmin", "60",
"--lmax", "75",
Expand Down
7 changes: 1 addition & 6 deletions doc/theory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,7 @@ an exponential schedule for the step size
The step size is used for perturbing the current solution in each step of the
SA algorithm to find a new candidate solution.
So the idea for using the schedule here is to start with relatively large
step size :math:`\delta_{start}` and to chose the rate according to an
target step size :math:`\delta_{end}`.
An according rate is easily derived by claiming
:math:`\delta(N_{max})=\delta_{end}` which leads to

.. math:: \gamma = \frac{1}{N_{max}}\log \left( \frac{\delta_{end}}{\delta_{start}} \right).
step size and reduce it over the course of the simulation.


Monte-Carlo algorithm
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gecos"
version = "1.3.0"
version = "2.0.0"
description = "Generated color schemes for sequence alignment visualizations"
readme = "README.rst"
license = "BSD-3-Clause"
Expand Down
2 changes: 1 addition & 1 deletion src/gecos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

__version__ = "1.3.0"
__version__ = "2.0.0"
__author__ = "Patrick Kunzmann"

from .colors import *
Expand Down
85 changes: 42 additions & 43 deletions src/gecos/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from multiprocessing import Pool
from os.path import join, dirname, realpath, isfile
import itertools
import argparse
import copy
import sys
Expand Down Expand Up @@ -37,22 +38,18 @@ class InputError(Exception):
pass


def _optimize(optline):
def _optimize(seed, alphabet, score_func, space, constraints,
nsteps, beta_start, beta_end, stepsize_start, stepsize_end):
"""
Worker function used for parallel execution of simulated annealing
optimizer with optline being a tuple containing the needed data
Worker function used for parallel execution of simulated annealing.
"""
(
alphabet, score_func, space, constraints,
nsteps, beta, rate, step_size_start, step_size_end,
seed
) = optline

np.random.seed(seed)
optimizer = ColorOptimizer(
alphabet, score_func, space, constraints
)
optimizer.optimize(nsteps, beta, rate, step_size_start, step_size_end)
optimizer.optimize(
nsteps, beta_start, beta_end, stepsize_start, stepsize_end
)
return optimizer.get_result()

@handle_error
Expand Down Expand Up @@ -201,29 +198,35 @@ def main(args=None, result_container=None, show_plots=True):
)
opt_group.add_argument(
"--nruns", default=16, type=int,
help="Number of parallel optimization algorithms runs that are to be "
"executed.",
help="Number of optimizations to run. "
"From these runs, the color scheme with the best score is "
"selected.",
metavar="NUMBER"
)
opt_group.add_argument(
"--nthreads", type=int,
help="Number of optimization runs to run in parallel. "
"By default this is equal to the number of CPUs.",
metavar="NUMBER"
)
opt_group.add_argument(
"--beta", default=1e-7, type=float,
help="Inverse start temperature for simulated annealing algorithm.",
"--beta-start", default=1, type=float,
help="Inverse temperature at start of simulated annealing.",
metavar="FLOAT",
)
opt_group.add_argument(
"--rate", default=1, type=float,
help="Rate controlling the exponential annealing schedule, "
"for the annealing of the inverse temperature.",
"--beta-end", default=500, type=float,
help="Inverse temperature at end of simulated annealing.",
metavar="FLOAT",
)
opt_group.add_argument(
"--step-size-start", default=20, type=float,
help="Start step size for simulated annealing algorithm.",
"--stepsize-start", default=10, type=float,
help="Step size temperature at start of simulated annealing.",
metavar="FLOAT",
)
opt_group.add_argument(
"--step-size-end", default=0.1, type=float,
help="End step size for simulated annealing algorithm.",
"--stepsize-end", default=0.2, type=float,
help="Step size temperature at end of simulated annealing.",
metavar="FLOAT",
)

Expand Down Expand Up @@ -318,30 +321,26 @@ def main(args=None, result_container=None, show_plots=True):

score_func = DefaultScoreFunction(matrix, args.contrast, args.delta)

# Simulated annealing
n_parallel = args.nruns
# Simulated annealing
if args.seed is not None:
np.random.seed(int(args.seed))
# Different randomly seed for each run
seeds = np.random.randint(0, 1000000, size=n_parallel)
opt_data = [
(
matrix.get_alphabet1(),
score_func,
space,
constraints,
args.nsteps,
args.beta,
args.rate,
args.step_size_start,
args.step_size_end,
seeds[i])
for i in range(n_parallel)
]

with Pool(n_parallel) as p:
results = p.map(_optimize, opt_data)
best_result = sorted(results, key=lambda x: x.score)[0]
# Different random seed for each run
seeds = np.random.randint(0, 1000000, size=args.nruns)

with Pool(args.nthreads) as p:
results = p.starmap(_optimize, zip(
seeds,
itertools.repeat(matrix.get_alphabet1()),
itertools.repeat(score_func),
itertools.repeat(space),
itertools.repeat(constraints),
itertools.repeat(args.nsteps),
itertools.repeat(args.beta_start),
itertools.repeat(args.beta_end),
itertools.repeat(args.stepsize_start),
itertools.repeat(args.stepsize_end),
))
best_result = min(results, key=lambda x: x.score)

scores = np.array([result.scores for result in results])
scores_mean = np.mean(scores, axis=0)
Expand Down
106 changes: 49 additions & 57 deletions src/gecos/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,78 +166,62 @@ def _set_coordinates(self, coord, score=None):
score = self._score_func(coord)
self._scores.append(score)

def optimize(self, n_steps,
beta_start, rate_beta, stepsize_start, stepsize_end):
def optimize(self, n_steps=20000,
beta_start=1, beta_end=500,
stepsize_start=10, stepsize_end=0.2):
r"""
Perform a Simulated Annealing optimization on the current
Perform a simulated annealing optimization on the current
coordinate to minimize the score returned by the score function.
This is basically a Monte-Carlo optimization where the
temperature is varied according to a so called annealing
schedule over the course of the optimization.
This is basically a Metropolis-Monte-Carlo optimization where
the inverse temperature and step size is varied according to
exponential annealing schedule over the course of the
optimization.
Parameters
----------
n_steps : int
The number of simulated annealing steps.
beta_start, beta_end : float
The inverse temperature in the first and last step of the
optimization, respectively.
Higher values allow less increase of score, i.e. result
in a steering into the local minimum.
Must be positive.
stepsize_start, stepsize_end : float
The step size in the first and last step of the
optimization, respectively.
it is the radius in which the coordinates are randomly
altered at in each optimization step.
Must be positive.
Notes
-----
The algorithm is a heuristic thats motivated by the physical
process of annealing.
If we, e.g., cool steel than a slow cooling can yield a superior
quality, whereas for a fast cooling the steel can become
brittle.
The same happens here within the search space for the given
minimization task.
Parameters
----------
n_steps : int
The number of Simulated-Annealing steps.
beta_start : float
The inverse start temperature, where the start temperature
would be :math:`T_{start} = 1/(k_b \cdot \beta_{start})` with
:math:`k_b` being the boltzmann constant.
rate_beta: float
The rate controls how fast the inverse temperature is
increased within the annealing schedule.
Here the exponential schedule is chosen so we have
:math:`\beta (t) = \beta_0 \cdot \exp(rate \cdot t)`.
stepsize_start : float
The radius in which the coordinates are randomly altered at
the beginning of the simulated anneling algorithm.
Like the inverse temperature the step size follows an
exponential schedule, enabling the algorithm
to do large perturbartions at the beginning of the algorithm
run and increasingly smaller ones afterwards.
stepsize_end : float
The radius in which the coordinates are randomly altered at
the end of the simulated annealing algorithm run.
minimization task.
"""
# Calculate the max value 'i' can reach so that
# 'np.exp(rate_beta*i)' does not overflow
max_i = np.log(np.finfo(np.float64).max) / rate_beta
beta = lambda i: beta_start*np.exp(rate_beta*i) \
if i < max_i else np.inf

# Choose rate so that stepsize_end reached after n_steps
# derived from step_size(N_steps) = steps_end
if stepsize_start == stepsize_end:
rate_stepsize = 0
else:
rate_stepsize = np.log(stepsize_end / stepsize_start) / n_steps
step_size = lambda i: stepsize_start * np.exp(rate_stepsize * i)
betas = _calculate_schedule(n_steps, beta_start, beta_end)
stepsizes = _calculate_schedule(n_steps, stepsize_start, stepsize_end)

for i in range(n_steps):

score = self._scores[-1]
new_coord = self._sample_coord(
self._coord,
lambda c: c + (random.rand(*c.shape)-0.5) * 2 * step_size(i)
lambda c: c + (random.rand(*c.shape)-0.5) * 2 * stepsizes[i]
)
new_score = self._score_func(new_coord)

if new_score < score:
self._set_coordinates(new_coord, new_score)

else:
p_accept = np.exp( -beta(i) * (new_score-score))
p = random.rand()

if p <= p_accept:
p_accept = np.exp( -betas[i] * (new_score-score))
if random.rand() <= p_accept:
self._set_coordinates(new_coord, new_score)
else:
self._set_coordinates(self._coord, score)
Expand Down Expand Up @@ -292,6 +276,15 @@ def _apply_constraints(self, coord):
coord[self._constraint_mask] = self._constraints[self._constraint_mask]


def _calculate_schedule(n_steps, start, end):
"""
Calculate the values for each step in an exponential schedule.
"""
# Use float 64
return start * (end/start)**np.linspace(0, 1, n_steps, dtype=np.float64)



class ScoreFunction(metaclass=abc.ABCMeta):
"""
Abstract base class for a score function.
Expand Down Expand Up @@ -396,19 +389,18 @@ def __call__(self, coord):
@staticmethod
def _calculate_distances(tri_indices, coord, distance_formula):
ind1, ind2 = tri_indices
if distance_formula == "CIEDE76":
dist = skimage.color.deltaE_ciede94(
coord[ind1], coord[ind2]
if distance_formula == "CIE76":
return np.sqrt(
np.sum((coord[ind1, :] - coord[ind2, :])**2, axis=-1)
)
elif distance_formula == "CIEDE94":
dist = skimage.color.deltaE_cie76(
return skimage.color.deltaE_ciede94(
coord[ind1], coord[ind2]
)
else: #"CIEDE2000"
dist = skimage.color.deltaE_ciede2000(
return skimage.color.deltaE_ciede2000(
coord[ind1], coord[ind2]
)
return dist

@staticmethod
def _calculate_ideal_distances(tri_indices, substitution_matrix):
Expand All @@ -419,4 +411,4 @@ def _calculate_ideal_distances(tri_indices, substitution_matrix):
distances = dist_matrix[ind_i, ind_j]
# Scale, so that average distance is 1
distances /= np.average(distances)
return distances
return distances
Loading

0 comments on commit 73c758c

Please sign in to comment.