Merge pull request #5 from UBC-CS/restructure-repo

Towards #4 - start comparison on synthetic test functions (results not great)
UBC-CS · May 25, 2018 · 9ae99d0 · 9ae99d0
2 parents c6188d7 + eb1dd76
commit 9ae99d0
Show file tree

Hide file tree

Showing 14 changed files with 337 additions and 45 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,10 @@
+FROM ubuntu:latest
+
+WORKDIR ./home
+
+COPY . ./
+RUN apt-get update && \
+    apt-get -y install python-pip && \
+    apt-get install python3
+
+RUN pip install -r requirements.txt
diff --git a/README.md b/README.md
@@ -1,2 +1,31 @@
 # lipo-python
-Implementing [Global optimization of Lipschitz functions](https://arxiv.org/abs/1703.02628).
+Implementing Global optimization of Lipschitz functions
+
+## Resources
+
+[Global optimization of Lipschitz functions](https://arxiv.org/abs/1703.02628).
+
+* C. Malherbe and N. Vayatis. "Global optimization of Lipschitz functions". ICML. 2314 - 2323. (2017)
+
+[BayesOpt](https://arxiv.org/abs/1405.7430)
+
+* R. Martinez-Cantin. BayesOpt: {A} Bayesian Optimization Library for Nonlinear Optimization, Experimental Design and Bandits. CoRR. 1405.7430. (2014) 
+
+[CMA-ES - Covariance Matrix Adaptation Evolution Strategy](https://www.researchgate.net/publication/227050324_The_CMA_Evolution_Strategy_A_Comparing_Review)
+
+* N. Hansen. The CMA Evolution Strategy: A Comparing Review. In J.A. Lozano, P. Larrañaga, I. Inza and E. Bengoetxea (Eds.). Towards a new evolutionary computation. Advances in estimation of distribution algorithms. Springer, pp. 75-102 (2006).
+
+**IS ABOVE POINTING AT THE CORRECT PAPER?**
+
+[CRS - Controlled Random Search with Local Mutation](https://link.springer.com/article/10.1007/s10957-006-9101-0)
+
+* P. Kaelo and M. M. Ali, "Some variants of the controlled random search algorithm for global optimization," J. Optim. Theory Appl. 130 (2), 253-264 (2006).
+
+[DIRECT](https://link.springer.com/article/10.1007/BF00941892)
+
+* D. R. Jones, C. D. Perttunen, and B. E. Stuckmann, "Lipschitzian optimization without the lipschitz constant," J. Optimization Theory and Applications, vol. 79, p. 157 (1993).
+
+[MLSL - Multi-Level Single-Linkage](https://link.springer.com/article/10.1007/BF02592071)  
+
+* A. H. G. Rinnooy Kan and G. T. Timmer, "Stochastic global optimization methods," Mathematical Programming, vol. 39, p. 27-78 (1987). (Actually 2 papers — part I: clustering methods, p. 27, then part II: multilevel methods, p. 57.)
+
diff --git a/data/first_comparison90.csv b/data/first_comparison90.csv
@@ -0,0 +1,3 @@
+,Holder Table,Rosenbrock,Linear Slope,Sphere,Deb N.1
+Adaptive LIPO,200.16 +/- 173.5,118.72 +/- 310.0,773.44 +/- 318.0,688.73 +/- 347.3,961.7 +/- 159.1
+Pure Random Search,171.04 +/- 160.6,158.46 +/- 353.6,747.1 +/- 347.1,717.49 +/- 344.8,939.76 +/- 198.3
diff --git a/docs/img/adalipo_prs.png b/docs/img/adalipo_prs.png
diff --git a/docs/img/holder_table_100x250.png b/docs/img/holder_table_100x250.png
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.14.2
+scipy==0.19.1
diff --git a/run.sh b/run.sh
@@ -0,0 +1,7 @@
+python src/synthetic-comparison.py --filename=data/simulation_results --num_sim=20 --num_iter=100
+
+python src/fig-5-generator.py --inputfile=data/simulation_results.pickle --outputfile=aggregate_results90 --target=0.9
+
+python src/fig-5-generator.py --inputfile=data/simulation_results.pickle --outputfile=aggregate_results95 --target=0.95
+
+python src/fig-5-generator.py --inputfile=data/simulation_results.pickle --outputfile=aggregate_results99 --target=0.99
diff --git a/src/__init__.py b/src/__init__.py
@@ -1,2 +1,3 @@
 from . import sequential
-from . import plotting
+from . import plotting
+from . import objective_functions
diff --git a/src/comparison.py b/src/comparison.py
@@ -7,7 +7,7 @@
 #
 # Usage: python comparison.py filename='imagename.png' [--num_sim, --num_iter]
 
-from sequential import lipo, prs
+from sequential import lipo, pure_random_search, adaptive_lipo
 from plotting import loss_v_iter
 
 import numpy as np
@@ -20,24 +20,19 @@
 parser.add_argument('--num_iter', type=int, default=100)
 args = parser.parse_args()
 
-#-------------------------------------------------------#
-# THE BOUNDS, FUNCTION, AND MINIMUM SHOULD BE ARGUMENTS #
-#-------------------------------------------------------#
+#---------------------------------------------#
+# THE BOUNDS AND FUNCTION SHOULD BE ARGUMENTS #
+#---------------------------------------------#
 
 if args.function == "holder_table":
 
     def f(x):
         inside_exp = np.abs(1-np.sqrt(x[0]*x[0]+x[1]*x[1])/np.pi)
-        return -np.abs(np.sin(x[0])*np.cos(x[1])*np.exp(inside_exp))
+        return np.abs(np.sin(x[0])*np.cos(x[1])*np.exp(inside_exp))
 
-    k = 40
+    #k = 40
     bnds = [(-10,10),(-10,10)]
 
-# an even simpler 1-d example
-#def g(x):
-#    return (np.cos(x) + 2*np.cos(np.pi*x) - np.sin(np.pi/2*x))
-#bnds = [(0,4*np.pi)]
-
 #----------------------------------------------------#
 # THE SEQUENTIAL STRATEGIES SHOULD ALSO BE ARGUMENTS #
 #----------------------------------------------------#
@@ -49,15 +44,15 @@ def main():
 
     for sim in np.arange(args.num_sim):
 
-        lipo_output = lipo(func=f, bounds=bnds, k=k, n=args.num_iter)
-        prs_output = prs(func=f, bounds=bnds, n=args.num_iter)
+        lipo_output = adaptive_lipo(func=f, bounds=bnds, n=args.num_iter)
+        prs_output = pure_random_search(func=f, bounds=bnds, n=args.num_iter)
 
         results_lipo[sim,:] = lipo_output['loss']
         results_prs[sim,:] = prs_output['loss']
 
     loss_v_iter(
         loss=[results_lipo, results_prs], 
-        names=['LIPO','PRS'],
+        names=['Adaptive LIPO','PRS'],
         color=['blue', 'orange'], 
         figsize=(20,10), 
         filename=args.filename

diff --git a/src/fig-5-generator.py b/src/fig-5-generator.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+from sequential import optimizers
+from objective_functions import synthetic_functions
+
+import numpy as np
+import argparse
+import pickle
+import pandas as pd
+from collections import defaultdict
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--inputfile', type=str)
+parser.add_argument('--outputfile', type=str)
+parser.add_argument('--target', type=float)
+args = parser.parse_args()
+
+def main():
+
+    with open(args.inputfile , 'rb') as stuff:
+        results = pickle.load(stuff)
+
+    table = recursive_dd()
+
+    # getting the maximum observed values for each test function
+    # as outlined on page 17-18 of the paper
+    for synthetic_name, synthetic_obj in synthetic_functions.items():
+        all_max = []
+        for optimizer_name, optimizer in optimizers.items():
+            N = len(results[optimizer_name][synthetic_name])
+            for sim in np.arange(N):
+                all_max.append(np.max(results[optimizer_name][synthetic_name][sim]['y']))
+        synthetic_obj['maximum'] = np.max(all_max)
+
+    # monte carlo estimate of the average value in the domain
+    # as outlined on pagge 17-18 of the paper
+    num_samples = 10**6
+
+    for synthetic_name, synthetic_obj in synthetic_functions.items():
+
+        bound_mins = np.array([bnd[0] for bnd in synthetic_obj['bnds']])
+        bound_maxs = np.array([bnd[1] for bnd in synthetic_obj['bnds']])
+
+        u = np.random.uniform(size=(num_samples, len(synthetic_obj['bnds'])))
+        x_samples = u * (bound_maxs - bound_mins) + bound_mins
+
+        y_samples = np.apply_along_axis(synthetic_obj['func'], axis=1, arr=x_samples)
+        synthetic_obj['avg'] = np.mean(y_samples)
+
+
+    for synthetic_name, synthetic_obj in synthetic_functions.items():
+        for optimizer_name, optimizer in optimizers.items():
+
+            N = len(results[optimizer_name][synthetic_name])
+            M = len(results[optimizer_name][synthetic_name][0]['y'])
+            cur_array = np.zeros((N, M))
+            for sim in np.arange(N):
+                cur_array[sim,:] = results[optimizer_name][synthetic_name][sim]['y']
+
+            # compute the target value we are looking for
+            # more thought needed here - the maximum is computed using the results
+            # according to the paper and the avg is computed by Monte Carlo 
+            cur_max = synthetic_obj['maximum']
+            cur_avg = synthetic_obj['avg']
+            target = cur_max - (cur_max - cur_avg) * (1 - args.target)
+
+            # find the number of iterations it took to reach target
+            # note: shouldchange the hard coded 1000 to passed argument
+            loc_pass_target = np.argmax(cur_array >= target, axis=1)
+            loc_pass_target[loc_pass_target == 0] = 1000
+
+            # add the computed results to the dictionary that we will 
+            # serialize and/or reformat to csv for writing
+            cur_results = {'mean': np.mean(loc_pass_target), 'std': np.std(loc_pass_target)}
+            table[optimizer_name][synthetic_name] = cur_results
+
+    # serialize
+    # note that if you want to load this serialized object you 
+    # need to have recursive_dd defined on the other end
+    with open(args.outputfile + '.pickle', 'wb') as place:
+        pickle.dump(table, place, protocol=pickle.HIGHEST_PROTOCOL)
+
+    csv_table = recursive_dd()
+    for optimizer_name, contents in table.items():
+        for func_name, results in contents.items():
+            csv_table[optimizer_name][func_name] = str(results['mean']) + ' +/- {0:.01f}'.format(results['std'])
+
+    df = pd.DataFrame.from_dict(csv_table).T
+    df = df[['Holder Table', 'Rosenbrock', 'Linear Slope', 'Sphere', 'Deb N.1']]
+
+    df.to_csv(args.outputfile + '.csv')
+
+
+def recursive_dd():
+    return defaultdict(recursive_dd)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/objective_functions.py b/src/objective_functions.py
@@ -0,0 +1,57 @@
+"""
+Objective functions and domains for comparing the sequential algorithms
+"""
+
+import numpy as np
+
+def holder_table(x):
+    """x is a numpy array containing two numbers"""
+    if x.shape != (2,):
+        raise ValueError('Input array should be shape of (2,)')
+    inside_exp = np.abs(1-np.sqrt(x[0]*x[0]+x[1]*x[1])/np.pi)
+    return np.abs(np.sin(x[0])*np.cos(x[1])*np.exp(inside_exp))
+
+holder_bounds = [(-10,10),(-10,10)]
+
+def rosenbrock(x):
+    """x is a numpy array containing three numbers"""
+    if x.shape != (3,):
+        raise ValueError('Input array should be shape of (3,)')
+    first = 100*(x[1] - x[0]**2)**2 + (x[0] - 1)**2
+    second = 100*(x[2] - x[1]**2)**2 + (x[1] - 1)**2
+    return -np.sum(first + second)
+
+rosenbrock_bounds = [(-2.048,2.048), (-2.048,2.048), (-2.048,2.048)]
+
+def sphere(x):
+    """x is a numpy array containing four numbers"""
+    if x.shape != (4,):
+        raise ValueError('Input array should be shape of (4,)')
+    return -np.sqrt(np.sum((x - np.pi/16)**2))
+
+sphere_bounds = [(0,1), (0,1), (0,1), (0,1)]
+
+def linear_slope(x):
+    """x is a numpy array containing four numbers"""
+    if x.shape != (4,):
+        raise ValueError('Input array should be shape of (4,)')
+    coef = np.array([10**((i - 1)/4) for i in np.arange(4)])
+    return np.sum(coef*x)
+
+linear_slope_bounds = [(-5,5), (-5,5), (-5,5), (-5,5)]
+
+def deb_one(x):
+    """x is a numpy array containing five numbers"""
+    if x.shape != (5,):
+        raise ValueError('Input array should be shape of (5,)')
+    return (1/5)*np.sum(np.sin(5*np.pi*x)**6) 
+
+deb_one_bounds = [(-5,5), (-5,5), (-5,5), (-5,5), (-5,5)]
+
+synthetic_functions = {
+    'Holder Table' : {'func': holder_table, 'bnds': holder_bounds},
+    'Rosenbrock': {'func': rosenbrock, 'bnds': rosenbrock_bounds},
+    'Sphere': {'func': sphere, 'bnds': sphere_bounds},
+    'Linear Slope': {'func': linear_slope, 'bnds': linear_slope_bounds},
+    'Deb N.1': {'func': deb_one, 'bnds': deb_one_bounds}
+}
diff --git a/src/plotting.py b/src/plotting.py
@@ -5,6 +5,20 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
+# font control from here: https://stackoverflow.com/a/39566040/2566031
+
+SMALL_SIZE = 12
+MEDIUM_SIZE = 16
+BIGGER_SIZE = 20
+
+plt.rc('font', size=SMALL_SIZE)
+plt.rc('axes', titlesize=SMALL_SIZE)
+plt.rc('axes', labelsize=MEDIUM_SIZE)
+plt.rc('xtick', labelsize=SMALL_SIZE)
+plt.rc('ytick', labelsize=SMALL_SIZE) 
+plt.rc('legend', fontsize=SMALL_SIZE)
+plt.rc('figure', titlesize=BIGGER_SIZE)
+
 def loss_v_iter(loss, color, names, filename=None, q=(5,95), figsize=(10,5)):
     """
     Utility function for plotting loss versus iterations in experiments.