diff --git a/.gitignore b/.gitignore
index 01ff82ea..b7483858 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,7 @@
*.py[cod]
# Optimizers
-HPOlib/optimizers/smac_2_06_01-dev/*
-HPOlib/optimizers/spearmint_april2013_mod/*
-HPOlib/optimizers/hyperopt_august2013_mod/*
+*_src*
# Runsolver
runsolver/src/*
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..8c18b4b8
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "optimizers/spearmint/spearmint_gitfork_mod_src"]
+ path = optimizers/spearmint/spearmint_gitfork_mod_src
+ url = https://github.com/automl/spearmint.git
diff --git a/HPOlib/Experiment.py b/HPOlib/Experiment.py
index 0c64b64e..2118682c 100644
--- a/HPOlib/Experiment.py
+++ b/HPOlib/Experiment.py
@@ -19,8 +19,6 @@
import cPickle
import logging
import os
-import scipy
-from scipy.stats.distributions import wrapcauchy_gen
import sys
import tempfile
diff --git a/HPOlib/Locker.py b/HPOlib/Locker.py
index e80b198a..3e3aadee 100755
--- a/HPOlib/Locker.py
+++ b/HPOlib/Locker.py
@@ -61,7 +61,7 @@ def unlock(self, filename):
if self.locks[filename] == 1:
success = safe_delete('%s.lock' % (filename))
if not success:
- logger.log("Could not unlock file: %s.\n", filename)
+ logger.error("Could not unlock file: %s.\n", filename)
del self.locks[filename]
return success
else:
diff --git a/HPOlib/Plotting/doAllPlots.py b/HPOlib/Plotting/doAllPlots.py
index 42132d2b..f5049105 100644
--- a/HPOlib/Plotting/doAllPlots.py
+++ b/HPOlib/Plotting/doAllPlots.py
@@ -227,23 +227,16 @@ def main():
_box_whisker(pkl_list=pkl_list, name_list=name_list, save=tmp_save,
log=log, cut=args.cut)
- # statistics
- if save_dir is not "":
- tmp_save = os.path.join(save_dir, "statistics_%s.txt" % time_str)
- else:
- tmp_save = save_dir
- sys.stdout.write("statistics.py ... %s ..." % tmp_save)
- _statistics(pkl_list=pkl_list, name_list=name_list, save=tmp_save,
- log=log, cut=args.cut)
-
# LaTeX table
if save_dir is not "":
tmp_save = os.path.join(save_dir, "table_%s.tex" % time_str)
else:
tmp_save = save_dir
sys.stdout.write("generateTexTable.py ... %s ..." % tmp_save)
- _generate_tex_table(pkl_list=pkl_list, name_list=name_list,
+ ret = _generate_tex_table(pkl_list=pkl_list, name_list=name_list,
save=tmp_save, log=log, cut=args.cut)
+ if ret is not None:
+ print ret
# We can always plot this
@@ -256,6 +249,15 @@ def main():
_optimizer_overhead(pkl_list=pkl_list, name_list=name_list, save=tmp_save,
log=log, cut=args.cut)
+ # statistics
+ if save_dir is not "":
+ tmp_save = os.path.join(save_dir, "statistics_%s.txt" % time_str)
+ else:
+ tmp_save = save_dir
+ sys.stdout.write("statistics.py ... %s ..." % tmp_save)
+ _statistics(pkl_list=pkl_list, name_list=name_list, save=tmp_save,
+ log=log, cut=args.cut)
+
# Error Trace with Std
if save_dir is not "":
tmp_save = os.path.join(save_dir, "TraceWithStd_perEval_%s.%s" % (time_str, args.file))
diff --git a/HPOlib/Plotting/generateTexTable.py b/HPOlib/Plotting/generateTexTable.py
index 76cff0ec..c1f0d119 100644
--- a/HPOlib/Plotting/generateTexTable.py
+++ b/HPOlib/Plotting/generateTexTable.py
@@ -31,6 +31,7 @@
jinja2 = ""
from HPOlib.Plotting import plot_util
+from HPOlib import wrapping_util
__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
__contact__ = "automl.org"
@@ -42,7 +43,7 @@
\\usepackage[landscape]{geometry}
\\usepackage{multirow} % import command \multicolmun
\\usepackage{tabularx} % Convenient table formatting
-\\usepackage{booktabs} % provides \toprule, \midrule and \bottomrule
+\\usepackage{booktabs} % provides \\toprule, \midrule and \\bottomrule
\\begin{document}
@@ -63,43 +64,57 @@
{{ experiment }} & {{ evals }}
{%- for name in result_values -%}
{%- set results = result_values[name] -%}
-{{ ' & ' }}{{ results['mean']|round(3, 'floor') }}$\\pm${{ results['std']|round(3, 'floor')}} & {{ results['min']|round(3, 'floor') }}
-{%- endfor %} \\\\
+{{ ' & ' }}{% if results['mean_best'] == True %}\\textbf{ {%- endif %}{{results['mean']|round(3, 'floor') }}{% if results['mean_best'] == True %}}{% endif %}$\\pm${{ results['std']|round(3, 'floor')}} & {{results['min']|round(3, 'floor') }}{%- endfor %} \\\\
\\bottomrule
\\end{tabularx}
\\end{table}
\\end{document}
"""
-def main(pkl_list, name_list, save, cut=sys.maxint):
+def main(pkl_list, name_list, save="", cut=sys.maxint,
+ template_string=template_string, experiment_name="Name",
+ num_evals="\\#eval"):
+ pickles = plot_util.load_pickles(name_list, pkl_list)
+ best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut)
+ return generate_tex_template(best_dict, name_list,
+ template_string=template_string, save=save,
+ num_evals=num_evals, experiment_name=experiment_name)
+
+
+def generate_tex_template(best_dict, name_list, save="",
+ template_string=template_string, experiment_name="Name",
+ num_evals="\\#eval"):
tex = StringIO()
result_values = OrderedDict([(name[0], dict()) for name in name_list])
- best_dict, idx_dict, keys = plot_util.read_pickles(name_list, pkl_list, cut)
+ means = [np.mean(best_dict[name]) for name in result_values]
+ stds = [np.std(best_dict[name]) for name in result_values]
+ mins = [np.min(best_dict[name]) for name in result_values]
+ maxs = [np.max(best_dict[name]) for name in result_values]
for name in result_values:
values = result_values[name]
values["mean"] = np.mean(best_dict[name])
- values["mean_bold"] = False
- values["mean_italic"] = False
+ values["mean_best"] = True if \
+ wrapping_util.float_eq(values["mean"], min(means)) else False
values["std"] = np.std(best_dict[name])
- values["std_bold"] = False
- values["std_italic"] = False
+ values["std_best"] = True if \
+ wrapping_util.float_eq(values["std"], min(stds)) else False
values["min"] = np.min(best_dict[name])
- values["min_bold"] = False
- values["min_italic"] = False
+ values["min_best"] = True if\
+ wrapping_util.float_eq(values["min"], min(mins)) else False
- values["max"] = np.min(best_dict[name])
- values["max_bold"] = False
- values["max_italic"] = False
+ values["max"] = np.max(best_dict[name])
+ values["max_best"] = True if\
+ wrapping_util.float_eq(values["max"], min(maxs)) else False
if jinja2:
template = Template(template_string)
tex.write(template.render(result_values=result_values,
- experiment="Name", evals="\\#evals"))
+ experiment=experiment_name, evals=num_evals))
else:
tex.write("Name & #evals")
for name in result_values:
@@ -119,7 +134,7 @@ def main(pkl_list, name_list, save, cut=sys.maxint):
with open(save, "w") as fh:
fh.write(table)
else:
- print table
+ return table
if __name__ == "__main__":
diff --git a/HPOlib/Plotting/plotTrace_perExp.py b/HPOlib/Plotting/plotTrace_perExp.py
new file mode 100644
index 00000000..85ada58c
--- /dev/null
+++ b/HPOlib/Plotting/plotTrace_perExp.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+##
+# wrapping: A program making it easy to use hyperparameter
+# optimization software.
+# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from argparse import ArgumentParser
+import cPickle
+import itertools
+import sys
+
+from matplotlib.pyplot import tight_layout, figure, subplots_adjust, subplot, savefig, show
+import matplotlib.gridspec
+import numpy as np
+
+from HPOlib.Plotting import plot_util
+
+__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
+__contact__ = "automl.org"
+
+
+def plot_optimization_trace_cv(trial_list, name_list, optimum=0, title="",
+ log=True, save="", y_max=0, y_min=0):
+ markers =plot_util.get_plot_markers()
+ colors = plot_util.get_plot_colors()
+ linestyles = itertools.cycle(['-'])
+ size = 1
+
+ ratio = 5
+ gs = matplotlib.gridspec.GridSpec(ratio, 1)
+ fig = figure(1, dpi=100)
+ fig.suptitle(title, fontsize=16)
+ ax1 = subplot(gs[0:ratio, :])
+ ax1.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
+ min_val = sys.maxint
+ max_val = -sys.maxint
+ max_trials = 0
+
+ fig.suptitle(title, fontsize=16)
+
+ # Plot the average error and std
+ for i in range(len(name_list)):
+ m = markers.next()
+ c = colors.next()
+ l = linestyles.next()
+ leg = False
+ for tr in trial_list[i]:
+ if log:
+ tr = np.log10(tr)
+ x = range(1, len(tr)+1)
+ y = tr
+ if not leg:
+ ax1.plot(x, y, color=c, linewidth=size, linestyle=l, label=name_list[i][0])
+ leg = True
+ ax1.plot(x, y, color=c, linewidth=size, linestyle=l)
+ min_val = min(min_val, min(tr))
+ max_val = max(max_val, max(tr))
+ max_trials = max(max_trials, len(tr))
+
+ # Maybe plot on logscale
+ ylabel = ""
+
+ if log:
+ ax1.set_ylabel("log10(Minfunction value)" + ylabel)
+ else:
+ ax1.set_ylabel("Minfunction value" + ylabel)
+
+ # Descript and label the stuff
+ leg = ax1.legend(loc='best', fancybox=True)
+ leg.get_frame().set_alpha(0.5)
+ ax1.set_xlabel("#Function evaluations")
+
+ if y_max == y_min:
+ # Set axes limits
+ ax1.set_ylim([min_val-0.1*abs((max_val-min_val)), max_val+0.1*abs((max_val-min_val))])
+ else:
+ ax1.set_ylim([y_min, y_max])
+ ax1.set_xlim([0, max_trials + 1])
+
+ tight_layout()
+ subplots_adjust(top=0.85)
+ if save != "":
+ savefig(save, dpi=100, facecolor='w', edgecolor='w',
+ orientation='portrait', papertype=None, format=None,
+ transparent=False, bbox_inches="tight", pad_inches=0.1)
+ else:
+ show()
+
+
+def main(pkl_list, name_list, autofill, optimum=0, save="", title="", log=False,
+ y_min=0, y_max=0):
+
+ trial_list = list()
+ for i in range(len(pkl_list)):
+ tmp_trial_list = list()
+ max_len = -sys.maxint
+ for pkl in pkl_list[i]:
+ fh = open(pkl, "r")
+ trials = cPickle.load(fh)
+ fh.close()
+
+ trace = plot_util.get_Trace_cv(trials)
+ tmp_trial_list.append(trace)
+ max_len = max(max_len, len(trace))
+ trial_list.append(list())
+ for tr in tmp_trial_list:
+ # if len(tr) < max_len:
+ # tr.extend([tr[-1] for idx in range(abs(max_len - len(tr)))])
+ trial_list[-1].append(np.array(tr))
+
+ plot_optimization_trace_cv(trial_list, name_list, optimum, title=title, log=log,
+ save=save, y_min=y_min, y_max=y_max)
+
+ if save != "":
+ sys.stdout.write("Saved plot to " + save + "\n")
+ else:
+ sys.stdout.write("..Done\n")
+
+if __name__ == "__main__":
+ prog = "python plotTraceWithStd.py WhatIsThis [WhatIsThis ]"
+ description = "Plot a Trace with std for multiple experiments"
+
+ parser = ArgumentParser(description=description, prog=prog)
+
+ # Options for specific benchmarks
+ parser.add_argument("-o", "--optimum", type=float, dest="optimum",
+ default=0, help="If not set, the optimum is supposed to be zero")
+
+ # Options which are available only for this plot
+ parser.add_argument("-a", "--autofill", action="store_true", dest="autofill",
+ default=False, help="Fill trace automatically")
+
+ # General Options
+ parser.add_argument("-l", "--log", action="store_true", dest="log",
+ default=False, help="Plot on log scale")
+ parser.add_argument("--max", dest="max", type=float,
+ default=0, help="Maximum of the plot")
+ parser.add_argument("--min", dest="min", type=float,
+ default=0, help="Minimum of the plot")
+ parser.add_argument("-s", "--save", dest="save",
+ default="", help="Where to save plot instead of showing it?")
+ parser.add_argument("-t", "--title", dest="title",
+ default="", help="Optional supertitle for plot")
+
+ args, unknown = parser.parse_known_args()
+
+ sys.stdout.write("\nFound " + str(len(unknown)) + " arguments\n")
+
+ pkl_list_main, name_list_main = plot_util.get_pkl_and_name_list(unknown)
+
+ main(pkl_list=pkl_list_main, name_list=name_list_main, autofill=args.autofill, optimum=args.optimum,
+ save=args.save, title=args.title, log=args.log, y_min=args.min, y_max=args.max)
diff --git a/HPOlib/Plotting/plot_util.py b/HPOlib/Plotting/plot_util.py
index 4f148bd4..a20abe45 100644
--- a/HPOlib/Plotting/plot_util.py
+++ b/HPOlib/Plotting/plot_util.py
@@ -22,10 +22,16 @@
import numpy as np
import sys
+import HPOlib.wrapping_util
+
__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
__contact__ = "automl.org"
+# A super-simple cache for unpickled objects...
+cache = dict()
+
+
def get_plot_markers():
return itertools.cycle(['o', 's', 'x', '^'])
@@ -43,7 +49,39 @@ def get_plot_colors():
"#999999"]) # Grey
-def read_pickles(name_list, pkl_list, cut=sys.maxint):
+def load_pickles(name_list, pkl_list):
+ pickles = dict()
+ for i in range(len(name_list)):
+ key = name_list[i][0]
+ pickles[key] = list()
+
+ for pkl in pkl_list[i]:
+ if cache.get(pkl) is None:
+ fh = open(pkl)
+ pickles[key].append(cPickle.load(fh))
+ fh.close()
+ cache[pkl] = pickles[key][-1]
+ else:
+ pickles[key].append(cache.get(pkl))
+ return pickles
+
+
+def get_best_dict(name_list, pickles, cut=sys.maxint):
+ """
+ Get the best values of many experiments.
+
+ Input
+ * name_list: A list with of tuples of kind (optimizer_name, num_pickles)
+ * pickles: A dictionary with all pickle files for an optimizer_name
+ * cut: How many iterations should be considered
+
+ Returns:
+ * best_dict: A dictionary with the best response value for every optimizer
+ * idx_dict: A dictionary with the number of iterations needed to find the
+ optimum
+ * keys: A list with optimizer names.
+
+ """
best_dict = dict()
idx_dict = dict()
keys = list()
@@ -51,11 +89,8 @@ def read_pickles(name_list, pkl_list, cut=sys.maxint):
keys.append(name_list[i][0])
best_dict[name_list[i][0]] = list()
idx_dict[name_list[i][0]] = list()
- for pkl in pkl_list[i]:
- fh = open(pkl)
- trial = cPickle.load(fh)
- fh.close()
- best, idx = get_best_value_and_index(trial, cut)
+ for pkl in pickles[name_list[i][0]]:
+ best, idx = get_best_value_and_index(pkl, cut)
best_dict[name_list[i][0]].append(best)
idx_dict[name_list[i][0]].append(idx)
return best_dict, idx_dict, keys
@@ -69,7 +104,7 @@ def get_pkl_and_name_list(argument_list):
if not ".pkl" in argument_list[i] and now_data:
raise ValueError("You need at least on .pkl file per Experiment, %s has none" % name_list[-1])
elif not ".pkl" in argument_list[i] and not now_data:
- print "Adding", argument_list[i]
+ # print "Adding", argument_list[i]
name_list.append([argument_list[i], 0])
pkl_list.append(list())
now_data = True
@@ -133,4 +168,23 @@ def get_best_value_and_index(trials, cut=False):
else:
best_value = traj[-1]
best_index = np.argmin(traj)
- return best_value, best_index
\ No newline at end of file
+ return best_value, best_index
+
+
+def get_Trace_cv(trials):
+ trace = list()
+ trials_list = trials['trials']
+ instance_order = trials['instance_order']
+ instance_mean = np.ones([len(trials_list), 1]) * np.inf
+ instance_val = np.ones([len(trials_list), len(trials_list[0]['instance_results'])]) * np.nan
+ for tr_idx, in_idx in instance_order:
+ instance_val[tr_idx, in_idx] = trials_list[tr_idx]['instance_results'][in_idx]
+
+ val = HPOlib.wrapping_util.nan_mean(instance_val[tr_idx, :])
+ if np.isnan(val):
+ val = np.inf
+ instance_mean[tr_idx] = val
+ trace.append(np.min(instance_mean, axis=0)[0])
+ if np.isnan(trace[-1]):
+ del trace[-1]
+ return trace
\ No newline at end of file
diff --git a/HPOlib/Plotting/results.sh b/HPOlib/Plotting/results.sh
index fea13d2a..c793a766 100644
--- a/HPOlib/Plotting/results.sh
+++ b/HPOlib/Plotting/results.sh
@@ -86,9 +86,9 @@ then
directory=`ls | grep "^hyperopt_august2013_mod_${i}_"`
if [ -a "${directory}/hyperopt_august2013_mod.out" ]
then
- num=`cat ${directory}/hyperopt_august2013_mod.out | grep " -----------------------RUNNING RUNSOLVER" | wc -l`
+ num=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result:" | wc -l`
- per=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result for ParamILS:" | sort -r | tail -1`
+ per=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result:" | cut -d" " -f 4 | sort -r -n | tail -1`
per=`echo $per | cut -d' ' -f9`
per=`echo $per | sed 's/,//'`
@@ -114,9 +114,9 @@ then
directory=`ls | grep "^random_hyperopt_august2013_mod_${i}_"`
if [ -a "${directory}/random_hyperopt_august2013_mod.out" ]
then
- num=`cat ${directory}/random_hyperopt_august2013_mod.out | grep " -----------------------RUNNING RUNSOLVER" | wc -l`
+ num=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result:" | wc -l`
- per=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result for ParamILS:" | sort -r | tail -1`
+ per=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result:" | cut -d" " -f 4 | sort -r -n | tail -1`
per=`echo $per | cut -d' ' -f9`
per=`echo $per | sed 's/,//'`
diff --git a/HPOlib/Plotting/statistics.py b/HPOlib/Plotting/statistics.py
index 62b14100..8b6e999a 100644
--- a/HPOlib/Plotting/statistics.py
+++ b/HPOlib/Plotting/statistics.py
@@ -113,7 +113,8 @@ def _mann_whitney_u(x, y=None):
def main(pkl_list, name_list, cut=sys.maxint):
- best_dict, idx_dict, keys = plot_util.read_pickles(name_list, pkl_list,
+ pickles = plot_util.load_pickles(name_list, pkl_list)
+ best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles,
cut=cut)
for k in keys:
diff --git a/HPOlib/__init__.py b/HPOlib/__init__.py
index 64ca9bc7..0b7f71ee 100644
--- a/HPOlib/__init__.py
+++ b/HPOlib/__init__.py
@@ -1,4 +1,4 @@
__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
__contact__ = "automl.org"
-__version__ = "0.1.0rc1"
+__version__ = "0.1.0"
diff --git a/HPOlib/benchmark_functions.py b/HPOlib/benchmark_functions.py
index 5a2eb531..af81d352 100644
--- a/HPOlib/benchmark_functions.py
+++ b/HPOlib/benchmark_functions.py
@@ -461,7 +461,7 @@ def save_svm_on_grid(params, ret_time=False, **kwargs):
def svm_on_grid(c, alpha, epsilon, ret_time=False):
- # Values for an 6*8*8 grid search which was performed by Hofman et. al.
+ # Values for an 24*14*4 grid search which was performed by Miller et. al.
# Values obtained from Jasper Snoek
# C, alpha, epsilon
# opt_time: return time instead of performance
diff --git a/HPOlib/config_parser/generalDefault.cfg b/HPOlib/config_parser/generalDefault.cfg
index 511fe2e9..9e5d80cd 100644
--- a/HPOlib/config_parser/generalDefault.cfg
+++ b/HPOlib/config_parser/generalDefault.cfg
@@ -1,4 +1,5 @@
[HPOLIB]
+console_output_delay = 1.0
#Will be used for wrapping.py and SMAC
run_instance = runsolver_wrapper.py
use_own_time_measurement = True
@@ -10,6 +11,8 @@ number_of_concurrent_jobs = 1
function_setup =
function_teardown =
+experiment_directory_prefix =
+
#The mem-limit must be expressed in mega-bytes
#The cpu-limit must be expressed in seconds (CPU time)
#The time-limit must be expressed in seconds (wall clock time)
diff --git a/HPOlib/cv.py b/HPOlib/cv.py
index 688e79b1..99964e56 100644
--- a/HPOlib/cv.py
+++ b/HPOlib/cv.py
@@ -195,11 +195,13 @@ def main(*args, **kwargs):
for arg in args:
if type(arg) == dict:
params = arg
+ break
if params is None:
logger.critical("No parameter dict found in cv.py.\n"
"args: %s\n kwargs: %s", args, kwargs)
- sys.exit(1)
+ # TODO: Hack for TPE and AUTOWeka
+ params = args
# Load the experiment to do time-keeping
cv_starttime = time.time()
diff --git a/HPOlib/runsolver_wrapper.py b/HPOlib/runsolver_wrapper.py
index 169ab626..09e8ff78 100644
--- a/HPOlib/runsolver_wrapper.py
+++ b/HPOlib/runsolver_wrapper.py
@@ -52,22 +52,22 @@ def remove_param_metadata(params):
"""
for para in params:
new_name = para
+
+ if isinstance(params[para], str):
+ params[para] = params[para].strip("'")
if "LOG10_" in para:
pos = para.find("LOG10")
new_name = para[0:pos] + para[pos + 6:]
- # new_name = new_name.strip("_")
params[new_name] = np.power(10, float(params[para]))
del params[para]
elif "LOG2" in para:
pos = para.find("LOG2_")
new_name = para[0:pos] + para[pos + 5:]
- # new_name = new_name.strip("_")
params[new_name] = np.power(2, float(params[para]))
del params[para]
elif "LOG_" in para:
pos = para.find("LOG")
new_name = para[0:pos] + para[pos + 4:]
- # new_name = new_name.strip("_")
params[new_name] = np.exp(float(params[para]))
del params[para]
#Check for Q value, returns round(x/q)*q
@@ -75,7 +75,6 @@ def remove_param_metadata(params):
if m is not None:
pos = new_name.find(m.group(0))
tmp = new_name[0:pos] + new_name[pos + len(m.group(0)):]
- #tmp = tmp.strip("_")
q = float(m.group(0)[1:-1])
params[tmp] = round(float(params[new_name]) / q) * q
del params[new_name]
@@ -276,7 +275,7 @@ def parse_output_files(cfg, run_instance_output, runsolver_output_file):
error + " Please have a look at " +
runsolver_output_file)
# It is useful to have the run_instance_output for debugging
- os.remove(run_instance_output)
+ # os.remove(run_instance_output)
return rval
@@ -331,6 +330,8 @@ def main():
experiment.set_one_fold_running(trial_index, fold)
del experiment # release Experiment lock
logger.debug("Calling: %s" % cmd)
+ #sys.stdout.write(cmd + "\n")
+ #sys.stdout.flush()
process = subprocess.Popen(cmd, stdout=fh,
stderr=fh, shell=True, executable="/bin/bash")
diff --git a/HPOlib/wrapping.py b/HPOlib/wrapping.py
index c4bbc886..a173cee6 100644
--- a/HPOlib/wrapping.py
+++ b/HPOlib/wrapping.py
@@ -19,6 +19,7 @@
from argparse import ArgumentParser
import imp
import logging
+import psutil
import os
from Queue import Queue, Empty
import signal
@@ -26,12 +27,13 @@
import subprocess
import sys
from threading import Thread
+import thread
import time
import HPOlib
import HPOlib.check_before_start as check_before_start
import HPOlib.wrapping_util as wrapping_util
-# Experiment is imported after we check for numpy
+# Import experiment only after the check for numpy succeeded
__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
__contact__ = "automl.org"
@@ -44,6 +46,59 @@
logger = logging.getLogger("HPOlib.wrapping")
+def get_all_p_for_pgid():
+ current_pgid = os.getpgid(os.getpid())
+ pids = psutil.pids()
+ running_pid = []
+ for pid in pids:
+ try:
+ pgid = os.getpgid(pid)
+ except:
+ continue
+
+ # Don't try to kill HPOlib-run
+ if pgid == current_pgid and pid != os.getpid():
+ # This solves the problem that a Zombie process counts
+ # towards the number of process which have to be killed
+ running_pid.append(pid)
+ return running_pid
+
+
+def kill_children(sig):
+ # TODO: somehow wait, until the Experiment pickle is written to disk
+ running_pid = get_all_p_for_pgid()
+
+ logger.critical("Running %s" % str(running_pid))
+ for pid in running_pid:
+ try:
+ os.kill(pid, sig)
+ except Exception as e:
+ logger.error(type(e))
+ logger.error(e)
+
+
+class Exit:
+ def __init__(self):
+ self.exit_flag = False
+ self.signal = None
+
+ def true(self):
+ self.exit_flag = True
+
+ def false(self):
+ self.exit_flag = False
+
+ def set_exit_flag(self, exit):
+ self.exit_flag = exit
+
+ def get_exit(self):
+ return self.exit_flag
+
+ def signal_callback(self, signal, frame):
+ self.true()
+ self.signal = signal
+
+
def calculate_wrapping_overhead(trials):
wrapping_time = 0
for times in zip(trials.cv_starttime, trials.cv_endtime):
@@ -83,6 +138,56 @@ def calculate_optimizer_time(trials):
return np.nansum(optimizer_time)
+def output_experiment_pickle(console_output_delay,
+ printed_start_configuration,
+ printed_end_configuration,
+ optimizer_dir_in_experiment,
+ optimizer, experiment_directory_prefix, lock,
+ Experiment, np, exit):
+ current_best = -1
+ while True:
+ try:
+ trials = Experiment.Experiment(optimizer_dir_in_experiment,
+ experiment_directory_prefix + optimizer)
+ except Exception as e:
+ logger.error(e)
+ time.sleep(console_output_delay)
+ continue
+
+ with lock:
+ for i in range(len(printed_end_configuration), len(trials.instance_order)):
+ configuration = trials.instance_order[i][0]
+ fold = trials.instance_order[i][1]
+ if i + 1 > len(printed_start_configuration):
+ logger.info("Starting configuration %5d, fold %2d",
+ configuration, fold)
+ printed_start_configuration.append(i)
+
+ if np.isfinite(trials.trials[configuration]
+ ["instance_results"][fold]):
+ last_result = trials.trials[configuration] \
+ ["instance_results"][fold]
+ tmp_current_best = trials.get_arg_best()
+ if tmp_current_best <= i:
+ current_best = tmp_current_best
+ # Calculate current best
+ # Check if last result is finite, if not calc nanmean over all instances
+ dct_helper = trials.trials[current_best]
+ res = dct_helper["result"] if \
+ np.isfinite(dct_helper["result"]) \
+ else wrapping_util.nan_mean(dct_helper["instance_results"])
+ #np.nanmean(trials.trials[current_best]["instance_results"])
+ # nanmean does not work for all numpy version
+ logger.info("Result %10f, current best %10f",
+ last_result, res)
+ printed_end_configuration.append(i)
+
+ del trials
+ time.sleep(console_output_delay)
+ if exit:
+ break
+
+
def use_arg_parser():
"""Parse all options which can be handled by the wrapping script.
Unknown arguments are ignored and returned as a list. It is useful to
@@ -178,11 +283,14 @@ def main():
import traceback
logger.critical(traceback.format_exc())
sys.exit(1)
+ experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix")
optimizer_call, optimizer_dir_in_experiment = optimizer_module.main(config=config,
- options=args,
- experiment_dir=experiment_dir)
+ options=args,
+ experiment_dir=experiment_dir,
+ experiment_directory_prefix=experiment_directory_prefix)
cmd = optimizer_call
+ config.set("HPOLIB", "seed", str(args.seed))
with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f:
config.set("HPOLIB", "is_not_original_config_file", "True")
wrapping_util.save_config_to_file(f, config, write_nones=True)
@@ -194,7 +302,9 @@ def main():
except OSError:
pass
folds = config.getint('HPOLIB', 'number_cv_folds')
- trials = Experiment.Experiment(optimizer_dir_in_experiment, optimizer, folds=folds,
+ trials = Experiment.Experiment(optimizer_dir_in_experiment,
+ experiment_directory_prefix + optimizer,
+ folds=folds,
max_wallclock_time=config.get('HPOLIB',
'cpu_limit'),
title=args.title)
@@ -237,18 +347,51 @@ def main():
fn_setup = config.get("HPOLIB", "function_setup")
if fn_setup:
try:
- output = subprocess.check_output(fn_setup, stderr=subprocess.STDOUT)
+ logger.info(fn_setup)
+ fn_setup = shlex.split(fn_setup)
+ output = subprocess.check_output(fn_setup, stderr=subprocess.STDOUT) #,
+ #shell=True, executable="/bin/bash")
+ logger.debug(output)
except subprocess.CalledProcessError as e:
logger.critical(e.output)
sys.exit(1)
+ except OSError as e:
+ logger.critical(e.message)
+ logger.critical(e.filename)
+ sys.exit(1)
logger.info(cmd)
output_file = os.path.join(optimizer_dir_in_experiment, optimizer + ".out")
fh = open(output_file, "a")
cmd = shlex.split(cmd)
print cmd
+
+ # Use a flag which is set to true as soon as all children are
+ # supposed to be killed
+ exit_ = Exit()
+ signal.signal(signal.SIGTERM, exit_.signal_callback)
+ signal.signal(signal.SIGABRT, exit_.signal_callback)
+ signal.signal(signal.SIGINT, exit_.signal_callback)
+ signal.signal(signal.SIGHUP, exit_.signal_callback)
+
+ # Change into the current experiment directory
+ # Some optimizer might expect this
+ dir_before_exp = os.getcwd()
+ os.chdir(optimizer_dir_in_experiment)
+ # See man 7 credentials for the meaning of a process group id
+ # This makes wrapping.py useable with SGEs default behaviour,
+ # where qdel sends a SIGKILL to a whole process group
+ logger.info(os.getpid())
+ os.setpgid(os.getpid(), os.getpid())
+ # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879
+ # maybe it has something todo with the previous behaviour where a
+ # session id was set...
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE, preexec_fn=os.setsid)
+ stderr=subprocess.PIPE)
+
+ global child_process_pid
+ child_process_pid = proc.pid
+
logger.info("-----------------------RUNNING----------------------------------")
# http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
# How often is the experiment pickle supposed to be opened?
@@ -257,15 +400,17 @@ def main():
else:
optimizer_end_time = sys.float_info.max
- last_output = time.time()
+ console_output_delay = config.getfloat("HPOLIB", "console_output_delay")
+
printed_start_configuration = list()
printed_end_configuration = list()
- current_best = -1
+ sent_SIGINT = False
+ sent_SIGINT_time = np.inf
sent_SIGTERM = False
+ sent_SIGTERM_time = np.inf
sent_SIGKILL = False
- # After the evaluation finished, we scan the experiment pickle twice
- # to print everything!
- minimal_runs_to_go = 2
+ sent_SIGKILL_time = np.inf
+
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
@@ -280,9 +425,26 @@ def enqueue_output(out, queue):
stdout_thread.daemon = True
stderr_thread.start()
stdout_thread.start()
- logger.info('Optimizer runs with PID: %d', proc.pid)
+ if not (args.verbose or args.silent):
+ lock = thread.allocate_lock()
+ thread.start_new_thread(output_experiment_pickle,
+ (console_output_delay,
+ printed_start_configuration,
+ printed_end_configuration,
+ optimizer_dir_in_experiment,
+ optimizer, experiment_directory_prefix,
+ lock, Experiment, np, False))
+ logger.info('Optimizer runs with PID: %d', proc.pid)
+
+ while True:
+ # this implements the total runtime limit
+ if time.time() > optimizer_end_time and not sent_SIGINT:
+ logger.info("Reached total_time_limit, going to shutdown.")
+ exit_.true()
+
+ # necessary, otherwise HPOlib-run takes 100% of one processor
+ time.sleep(0.2)
- while minimal_runs_to_go > 0: # Think of this as a do-while loop...
try:
while True:
line = stdout_queue.get_nowait()
@@ -307,74 +469,68 @@ def enqueue_output(out, queue):
except Empty:
pass
- if time.time() > optimizer_end_time and not sent_SIGTERM:
- os.killpg(proc.pid, signal.SIGTERM)
+ ret = proc.poll()
+
+ running = get_all_p_for_pgid()
+ if ret is not None and len(running) == 0:
+ break
+ # TODO: what happens if we have a ret but something is still
+ # running?
+
+ if exit_.get_exit() == True and not sent_SIGINT:
+ logger.info("Sending SIGINT")
+ kill_children(signal.SIGINT)
+ sent_SIGINT_time = time.time()
+ sent_SIGINT = True
+
+ if exit_.get_exit() == True and not sent_SIGTERM and time.time() \
+ > sent_SIGINT_time + 100:
+ logger.info("Sending SIGTERM")
+ kill_children(signal.SIGTERM)
+ sent_SIGTERM_time = time.time()
sent_SIGTERM = True
- if time.time() > optimizer_end_time + 200 and not sent_SIGKILL:
- os.killpg(proc.pid, signal.SIGKILL)
+ if exit_.get_exit() == True and not sent_SIGKILL and time.time() \
+ > sent_SIGTERM_time + 100:
+ logger.info("Sending SIGKILL")
+ kill_children(signal.SIGKILL)
+ sent_SIGKILL_time = time.time()
sent_SIGKILL = True
- fh.flush()
- # necessary, otherwise HPOlib-run takes 100% of one processor
- time.sleep(0.1)
-
- if not (args.verbose or args.silent) and time.time() - last_output > 1:
- trials = Experiment.Experiment(optimizer_dir_in_experiment,
- optimizer)
-
- for i in range(len(printed_end_configuration), len(trials.instance_order)):
- configuration = trials.instance_order[i][0]
- fold = trials.instance_order[i][1]
- if i + 1 > len(printed_start_configuration):
- logger.info("Starting configuration %5d, fold %2d",
- configuration, fold)
- printed_start_configuration.append(i)
-
- if np.isfinite(trials.trials[configuration]
- ["instance_results"][fold]):
- last_result = trials.trials[configuration] \
- ["instance_results"][fold]
- tmp_current_best = trials.get_arg_best()
- if tmp_current_best <= i:
- current_best = tmp_current_best
- # Calculate current best
- # Check if last result is finite, if not calc nanmean over all instances
- dct_helper = trials.trials[current_best]
- res = dct_helper["result"] if \
- np.isfinite(dct_helper["result"]) \
- else wrapping_util.nan_mean(dct_helper["instance_results"])
- #np.nanmean(trials.trials[current_best]["instance_results"])
- # nanmean does not work for all numpy version
- logger.info("Result %10f, current best %10f",
- last_result, res)
- printed_end_configuration.append(i)
-
- del trials
- last_output = time.time()
-
- if proc.poll() is not None: # the end condition for the
- minimal_runs_to_go -= 1 # do-while loop
-
- elif args.verbose or args.silent:
- if proc.poll() is not None:
- minimal_runs_to_go -= 1
-
ret = proc.returncode
+ del proc
+
+ if not (args.verbose or args.silent):
+ output_experiment_pickle(console_output_delay,
+ printed_start_configuration,
+ printed_end_configuration,
+ optimizer_dir_in_experiment,
+ optimizer, experiment_directory_prefix,
+ lock, Experiment, np, True)
logger.info("-----------------------END--------------------------------------")
fh.close()
+ # Change back into to directory
+ os.chdir(dir_before_exp)
+
# call target_function.teardown()
fn_teardown = config.get("HPOLIB", "function_teardown")
if fn_teardown:
try:
- output = subprocess.check_output(fn_teardown, stderr=subprocess.STDOUT)
+ fn_teardown = shlex.split(fn_teardown)
+ output = subprocess.check_output(fn_teardown, stderr=subprocess.STDOUT) #,
+ #shell=True, executable="/bin/bash")
except subprocess.CalledProcessError as e:
logger.critical(e.output)
sys.exit(1)
+ except OSError as e:
+ logger.critical(e.message)
+ logger.critical(e.filename)
+ sys.exit(1)
- trials = Experiment.Experiment(optimizer_dir_in_experiment, optimizer)
+ trials = Experiment.Experiment(optimizer_dir_in_experiment,
+ experiment_directory_prefix + optimizer)
trials.endtime.append(time.time())
#noinspection PyProtectedMember
trials._save_jobs()
diff --git a/HPOlib/wrapping_util.py b/HPOlib/wrapping_util.py
index 30536998..8c5f1edd 100644
--- a/HPOlib/wrapping_util.py
+++ b/HPOlib/wrapping_util.py
@@ -27,6 +27,7 @@
import os
from StringIO import StringIO
import sys
+import types
import config_parser.parse as parse
@@ -108,7 +109,8 @@ def load_experiment_config_file():
if not config.has_option("HPOLIB", "is_not_original_config_file"):
logger.critical("Config file in directory %s seems to be an"
" original config which was not created by wrapping.py. "
- "Please contact the HPOlib maintainer to solve this issue.")
+ "Are you sure that you are in the right directory?" %
+ os.getcwd())
sys.exit(1)
return config
except IOError as e:
@@ -207,7 +209,8 @@ def parse_config_values_from_unknown_arguments(unknown_arguments, config):
config: A ConfigParser.SafeConfigParser object which contains all keys
should be parsed from the unknown_arguments list.
Returns:
- an argparse.Namespace object containing the parsed values.
+ an argparse.Namespace object containing the parsed values. These are
+ packed inside a python list or None if not present.
Raises:
an error if an argument from unknown_arguments is not a key in config
"""
@@ -219,7 +222,7 @@ def parse_config_values_from_unknown_arguments(unknown_arguments, config):
parser = ArgumentParser()
for argument in further_possible_command_line_arguments:
- parser.add_argument(argument)
+ parser.add_argument(argument, nargs="+")
return parser.parse_args(unknown_arguments)
@@ -230,7 +233,10 @@ def config_with_cli_arguments(config, config_overrides):
for key in config.options(section):
cli_key = "%s:%s" % (section, key)
if cli_key in arg_dict:
- config.set(section, key, arg_dict[cli_key])
+ value = arg_dict[cli_key]
+ if value is not None and not isinstance(value, types.StringTypes):
+ value = " ".join(value)
+ config.set(section, key, value)
else:
config.remove_option(section, key)
return config
diff --git a/INSTALL.md b/INSTALL.md
index 212f6492..e385c84c 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -80,7 +80,7 @@ ls optimizers/smac
```bash
cd benchmarks/branin
-HPOlib-run -o ../../optimizers/smac/sma -s 23
+HPOlib-run -o ../../optimizers/smac/smac -s 23
```
This takes depending on your machine ~2 minutes. You can now plot the results of your first experiment:
@@ -92,17 +92,20 @@ HPOlib-plot FIRSTRUN smac_2_06_01-dev_23_*/smac_*.pkl -s `pwd`/Plots/
You can test the other optimizers (spearmint will take quite longer 30min):
```bash
- HPOlib-run -o ../../optimizers/tpe/h -s 23
- HPOlib-run -o ../../optimizers/spearmint/s -s 23
+HPOlib-run -o ../../optimizers/tpe/h -s 23
+HPOlib-run -o ../../optimizers/spearmint/spearmint_april2013 -s 23
```
and again:
```bash
- HPOlib-plot SMAC smac_2_06_01-dev_23_*/smac_*.pkl TPE hyperopt_august2013_mod_23_*/hyp*.pkl SPEARMINT spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/
+HPOlib-plot SMAC smac_2_06_01-dev_23_*/smac_*.pkl TPE hyperopt_august2013_mod_23_*/hyp*.pkl SPEARMINT spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/
+```
+
and to check the general performance on this super complex benchmark:
- HPOlib-plot BRANIN smac_2_06_01-dev_23_*/smac_*.pkl hyperopt_august2013_mod_23_*/hyp*.pkl spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/
+```
+HPOlib-plot BRANIN smac_2_06_01-dev_23_*/smac_*.pkl hyperopt_august2013_mod_23_*/hyp*.pkl spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/
```
Using without installation
@@ -112,21 +115,21 @@ Using without installation
If you decide to not install HPOlib, you need to download the optimizer code by yourself
```bash
- cd optimizers
- wget http://www.automl.org/hyperopt_august2013_mod_src.tar.gz
- wget http://www.automl.org/smac_2_06_01-dev_src.tar.gz
- wget http://www.automl.org/spearmint_april2013_mod_src.tar.gz
+cd optimizers
+wget http://www.automl.org/hyperopt_august2013_mod_src.tar.gz
+wget http://www.automl.org/smac_2_06_01-dev_src.tar.gz
+wget http://www.automl.org/spearmint_april2013_mod_src.tar.gz
- tar -xf hyperopt_august2013_mod_src.tar.gz
- mv hyperopt_august2013_mod_src tpe/
+tar -xf hyperopt_august2013_mod_src.tar.gz
+mv hyperopt_august2013_mod_src tpe/
- tar -xf smac_2_06_01-dev_src.tar.gz
- mv smac_2_06_01-dev_src.tar.gz smac/
+tar -xf smac_2_06_01-dev_src.tar.gz
+mv smac_2_06_01-dev_src.tar.gz smac/
- tar -xf spearmint_april2013_mod_src.tar.gz
- mv spearmint_april2013_mod_src spearmint/
+tar -xf spearmint_april2013_mod_src.tar.gz
+mv spearmint_april2013_mod_src spearmint/
- cd ../
+cd ../
```
And you need to install all requirements:
@@ -147,25 +150,25 @@ e.g. with
Also you need the runsolver
```bash
- wget http://www.cril.univ-artois.fr/~roussel/runsolver/runsolver-3.3.2.tar.bz2
- tar -xf runsolver-3.3.2.tar.bz2
- cd runsolver/src
+wget http://www.cril.univ-artois.fr/~roussel/runsolver/runsolver-3.3.2.tar.bz2
+tar -xf runsolver-3.3.2.tar.bz2
+cd runsolver/src
make
```
as this might not work, you can change the makefile via
```bash
- sed -i 's/\/usr\/include\/asm\/unistd/\/usr\/include\/unistd/g' ./Makefile
- make
+sed -i 's/\/usr\/include\/asm\/unistd/\/usr\/include\/unistd/g' ./Makefile
+make
```
then you need to add runsolver (and HPOlib) to your PATH (PYTHONPATH):
```bash
- cd ../../
- export PATH=$PATH:/path/to/runsolver/src/
- export PYTHONPATH=$PYTHONPATH:`pwd`
+cd ../../
+export PATH=$PATH:/path/to/runsolver/src/
+export PYTHONPATH=$PYTHONPATH:`pwd`
```
then you can run a benchmark like in step 5.) from installing with setup.py with replacing
diff --git a/benchmarks/branin/config.cfg b/benchmarks/branin/config.cfg
index 4a7dbc05..c14fa870 100644
--- a/benchmarks/branin/config.cfg
+++ b/benchmarks/branin/config.cfg
@@ -1,5 +1,5 @@
[SMAC]
-p = smac_2_06_01-dev/params.pcs
+p = params.pcs
[TPE]
space = space.py
diff --git a/optimizers/smac/smac_2_06_01-dev.py b/optimizers/smac/smac_2_06_01-dev.py
index 2b038d55..1df77c8c 100644
--- a/optimizers/smac/smac_2_06_01-dev.py
+++ b/optimizers/smac/smac_2_06_01-dev.py
@@ -92,7 +92,7 @@ def build_smac_call(config, options, optimizer_dir):
'--algoExec', '"python', os.path.join(algo_exec_dir,
config.get('SMAC', 'algo_exec')) + '"',
'--execDir', optimizer_dir,
- '-p', config.get('SMAC', 'p'),
+ '-p', os.path.join(optimizer_dir, os.path.basename(config.get('SMAC', 'p'))),
# The experiment dir MUST not be specified when restarting, it is set
# further down in the code
# '--experimentDir', optimizer_dir,
@@ -196,7 +196,7 @@ def restore(config, optimizer_dir, **kwargs):
#noinspection PyUnusedLocal
-def main(config, options, experiment_dir, **kwargs):
+def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs):
# config: Loaded .cfg file
# options: Options containing seed, restore_dir,
# experiment_dir: Experiment directory/Benchmark_directory
@@ -211,7 +211,9 @@ def main(config, options, experiment_dir, **kwargs):
raise Exception("The restore directory does not exist")
optimizer_dir = options.restore
else:
- optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" +
+ optimizer_dir = os.path.join(experiment_dir,
+ experiment_directory_prefix
+ + optimizer_str + "_" +
str(options.seed) + "_" + time_string)
# Build call
cmd = build_smac_call(config, options, optimizer_dir)
@@ -221,7 +223,20 @@ def main(config, options, experiment_dir, **kwargs):
os.mkdir(optimizer_dir)
# TODO: This can cause huge problems when the files are located
# somewhere else?
- params = os.path.split(config.get('SMAC', "p"))[1]
+ space = config.get('SMAC', "p")
+ abs_space = os.path.abspath(space)
+ parent_space = os.path.join(experiment_dir, optimizer_str, space)
+ if os.path.exists(abs_space):
+ space = abs_space
+ elif os.path.exists(parent_space):
+ space = parent_space
+ else:
+ raise Exception("SMAC search space not found. Searched at %s and "
+ "%s" % (abs_space, parent_space))
+
+ if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))):
+ os.symlink(os.path.join(experiment_dir, optimizer_str, space),
+ os.path.join(optimizer_dir, os.path.basename(space)))
# Copy the smac search space and create the instance information
fh = open(os.path.join(optimizer_dir, 'train.txt'), "w")
@@ -236,10 +251,7 @@ def main(config, options, experiment_dir, **kwargs):
fh = open(os.path.join(optimizer_dir, "scenario.txt"), "w")
fh.close()
-
- if not os.path.exists(os.path.join(optimizer_dir, params)):
- os.symlink(os.path.join(experiment_dir, optimizer_str, params),
- os.path.join(optimizer_dir, params))
+
logger.info("### INFORMATION ################################################################")
logger.info("# You're running %40s #" % config.get('SMAC', 'path_to_optimizer'))
for v in version_info:
diff --git a/optimizers/smac/smac_2_06_01-devDefault.cfg b/optimizers/smac/smac_2_06_01-devDefault.cfg
index c0d70c79..605cc3f0 100644
--- a/optimizers/smac/smac_2_06_01-devDefault.cfg
+++ b/optimizers/smac/smac_2_06_01-devDefault.cfg
@@ -3,21 +3,9 @@
handles_cv=1
[SMAC]
-# cutoff_time = runsolver_time_limit + 100 sec
-# Set otherwise
-# algo_exec = %(run_instance)s
-# num_concurrent_algo_execs = %(number_of_concurrent_jobs)s
-# experimentDir = %(exp_directory)s/
-# outputDirectory = %(exp_directory)s/smac/output/
-# total_num_runs_limit = %(number_of_jobs)s * %(cv)s
-
-num_run = 0
intra_instance_obj = MEAN
run_obj = QUALITY
-#instance_file = train.txt
-#test_instance_file = test.txt
-#execDir = ./
p = smac_2_06_01-dev/params.pcs
rf_full_tree_bootstrap = False
rf_split_min = 10
@@ -25,8 +13,6 @@ rf_split_min = 10
adaptive_capping = false
max_incumbent_runs = 2000
num_iterations = 2147483647
-# No one actually cares about it...
-runtime_limit = 2147483647
deterministic = True
retry_target_algorithm_run_count = 0
diff --git a/optimizers/spearmint/spearmint_april2013_mod.py b/optimizers/spearmint/spearmint_april2013_mod.py
index cc52564d..81e79d2c 100644
--- a/optimizers/spearmint/spearmint_april2013_mod.py
+++ b/optimizers/spearmint/spearmint_april2013_mod.py
@@ -60,7 +60,7 @@ def build_spearmint_call(config, options, optimizer_dir):
print
call = 'python ' + os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint_sync.py')
call = ' '.join([call, optimizer_dir,
- '--config', config.get('SPEARMINT', 'config'),
+ '--config', os.path.join(optimizer_dir, os.path.basename(config.get('SPEARMINT', 'config'))),
'--max-concurrent', config.get('HPOLIB', 'number_of_concurrent_jobs'),
'--max-finished-jobs', config.get('SPEARMINT', 'max_finished_jobs'),
'--polling-time', config.get('SPEARMINT', 'spearmint_polling_time'),
@@ -103,7 +103,7 @@ def restore(config, optimizer_dir, **kwargs):
#noinspection PyUnusedLocal
-def main(config, options, experiment_dir, **kwargs):
+def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs):
# config: Loaded .cfg file
# options: Options containing seed, restore_dir,
# experiment_dir: Experiment directory/Benchmark_directory
@@ -118,7 +118,9 @@ def main(config, options, experiment_dir, **kwargs):
raise Exception("The restore directory does not exist")
optimizer_dir = options.restore
else:
- optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" +
+ optimizer_dir = os.path.join(experiment_dir,
+ experiment_directory_prefix
+ + optimizer_str + "_" +
str(options.seed) + "_" + time_string)
# Build call
@@ -128,10 +130,20 @@ def main(config, options, experiment_dir, **kwargs):
if not os.path.exists(optimizer_dir):
os.mkdir(optimizer_dir)
# Make a link to the Protocol-Buffer config file
- configpb = config.get('SPEARMINT', 'config')
- if not os.path.exists(os.path.join(optimizer_dir, configpb)):
- os.symlink(os.path.join(experiment_dir, optimizer_str, configpb),
- os.path.join(optimizer_dir, configpb))
+ space = config.get('SPEARMINT', 'config')
+ abs_space = os.path.abspath(space)
+ parent_space = os.path.join(experiment_dir, optimizer_str, space)
+ if os.path.exists(abs_space):
+ space = abs_space
+ elif os.path.exists(parent_space):
+ space = parent_space
+ else:
+ raise Exception("Spearmint search space not found. Searched at %s and "
+ "%s" % (abs_space, parent_space))
+ # Copy the hyperopt search space
+ if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))):
+ os.symlink(os.path.join(experiment_dir, optimizer_str, space),
+ os.path.join(optimizer_dir, os.path.basename(space)))
logger.info("### INFORMATION ################################################################")
logger.info("# You're running %40s #" % path_to_optimizer)
logger.info("%s" % version_info)
diff --git a/optimizers/spearmint/spearmint_april2013_modDefault.cfg b/optimizers/spearmint/spearmint_april2013_modDefault.cfg
index eed72c5a..505e8970 100644
--- a/optimizers/spearmint/spearmint_april2013_modDefault.cfg
+++ b/optimizers/spearmint/spearmint_april2013_modDefault.cfg
@@ -1,24 +1,15 @@
[SPEARMINT]
-#Which spearmint script to call
-script = python spearmint_sync.py
# Which chooser to take (string)
method = GPEIOptChooser
#Arguments to pass to chooser module
method_args =
#Number of experiments in initial grid
grid_size = 20000
-#The seed used to initialize initial grid (int)
-grid_seed = 1
#Does not need to be specified, when experiment directory with a 'config.pb' is
#present. For Syntax see 'config.bp' in braninpy directory
config = config.pb
-#Run in job_wrapper mode (bool)
-wrapper = 0
#The time in_between successive polls for results (float)
spearmint_polling_time = 3.0
-#General Parameters
-max_concurrent = 1
-#max_finished_jobs = %(number_of_jobs)s
# either relative to __file__ or absolute
path_to_optimizer = ./spearmint_april2013_mod_src
diff --git a/optimizers/spearmint/spearmint_gitfork_mod.py b/optimizers/spearmint/spearmint_gitfork_mod.py
new file mode 100644
index 00000000..7b7592ef
--- /dev/null
+++ b/optimizers/spearmint/spearmint_gitfork_mod.py
@@ -0,0 +1,127 @@
+##
+# wrapping: A program making it easy to use hyperparameter
+# optimization software.
+# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import cPickle
+import logging
+import os
+import sys
+
+import numpy as np
+
+import HPOlib.wrapping_util
+
+
+__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
+__contact__ = "automl.org"
+
+
+logger = logging.getLogger("HPOlib.spearmint_april2013_mod")
+
+
+path_to_optimizer = "optimizers/spearmint_march2014_mod/"
+version_info = ("# %76s #\n" % "git version march 2014")
+
+
+def check_dependencies():
+ try:
+ import google.protobuf
+ try:
+ from google.protobuf.internal import enum_type_wrapper
+ except ImportError:
+ raise ImportError("Installed google.protobuf version is too old, you need at least 2.5.0")
+ except ImportError:
+ raise ImportError("Google protobuf cannot be imported. Are you sure "
+ "it's installed?")
+ try:
+ import numpy
+ except ImportError:
+ raise ImportError("Numpy cannot be imported. Are you sure that it's"
+ " installed?")
+ try:
+ import scipy
+ except ImportError:
+ raise ImportError("Scipy cannot be imported. Are you sure that it's"
+ " installed?")
+
+
+def build_spearmint_call(config, options, optimizer_dir):
+ os.environ['PYTHONPATH'] = os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint') + \
+ os.pathsep + os.environ['PYTHONPATH']
+ print os.environ['PYTHONPATH']
+ call = 'python ' + \
+ os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint', 'spearmint', 'main.py')
+ call = ' '.join([call, os.path.join(optimizer_dir, config.get('SPEARMINT', 'config')),
+ '--driver=local',
+ '--max-concurrent', config.get('HPOLIB', 'number_of_concurrent_jobs'),
+ '--max-finished-jobs', config.get('SPEARMINT', 'max_finished_jobs'),
+ '--polling-time', config.get('SPEARMINT', 'spearmint_polling_time'),
+ '--grid-size', config.get('SPEARMINT', 'grid_size'),
+ '--method', config.get('SPEARMINT', 'method'),
+ '--method-args=' + config.get('SPEARMINT', 'method_args'),
+ '--grid-seed', str(options.seed)])
+ if config.get('SPEARMINT', 'method') != "GPEIChooser" and \
+ config.get('SPEARMINT', 'method') != "GPEIOptChooser":
+ logger.warning('WARNING: This chooser might not work yet\n')
+ call = ' '.join([call, config.get("SPEARMINT", 'method_args')])
+ return call
+
+
+#noinspection PyUnusedLocal
+def restore(config, optimizer_dir, **kwargs):
+ raise NotImplementedError("Restoring is not possible for this optimizer")
+
+
+#noinspection PyUnusedLocal
+def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs):
+ # config: Loaded .cfg file
+ # options: Options containing seed, restore_dir,
+ # experiment_dir: Experiment directory/Benchmark_directory
+ # **kwargs: Nothing so far
+
+ time_string = HPOlib.wrapping_util.get_time_string()
+ optimizer_str = os.path.splitext(os.path.basename(__file__))[0]
+
+ # Find experiment directory
+ if options.restore:
+ if not os.path.exists(options.restore):
+ raise Exception("The restore directory does not exist")
+ optimizer_dir = options.restore
+ else:
+ optimizer_dir = os.path.join(experiment_dir,
+ experiment_directory_prefix +
+ optimizer_str + "_" +
+ str(options.seed) + "_" + time_string)
+
+ # Build call
+ cmd = build_spearmint_call(config, options, optimizer_dir)
+
+ # Set up experiment directory
+ if not os.path.exists(optimizer_dir):
+ os.mkdir(optimizer_dir)
+ # Make a link to the Protocol-Buffer config file
+ configpb = config.get('SPEARMINT', 'config')
+ if not os.path.exists(os.path.join(optimizer_dir, configpb)):
+ os.symlink(os.path.join(experiment_dir, optimizer_str, configpb),
+ os.path.join(optimizer_dir, configpb))
+ logger.info("### INFORMATION ################################################################")
+ logger.info("# You're running %40s #" % path_to_optimizer)
+ logger.info("%s" % version_info)
+ logger.info("# A newer version might be available, but not yet built in. #")
+ logger.info("# Please use this version only to reproduce our results on automl.org #")
+ logger.info("################################################################################")
+ return cmd, optimizer_dir
\ No newline at end of file
diff --git a/optimizers/spearmint/spearmint_gitfork_modDefault.cfg b/optimizers/spearmint/spearmint_gitfork_modDefault.cfg
new file mode 100644
index 00000000..85b8ef07
--- /dev/null
+++ b/optimizers/spearmint/spearmint_gitfork_modDefault.cfg
@@ -0,0 +1,24 @@
+[SPEARMINT]
+#Which spearmint script to call
+script = python spearmint_sync.py
+# Which chooser to take (string)
+method = GPEIOptChooser
+#Arguments to pass to chooser module
+method_args =
+#Number of experiments in initial grid
+grid_size = 20000
+#The seed used to initialize initial grid (int)
+grid_seed = 1
+#Does not need to be specified, when experiment directory with a 'config.pb' is
+#present. For Syntax see 'config.bp' in braninpy directory
+config = config.pb
+#Run in job_wrapper mode (bool)
+wrapper = 0
+#The time in_between successive polls for results (float)
+spearmint_polling_time = 3.0
+#General Parameters
+max_concurrent = 1
+#max_finished_jobs = %(number_of_jobs)s
+
+# either relative to __file__ or absolute
+path_to_optimizer = ./spearmint_gitfork_mod_src
diff --git a/optimizers/spearmint/spearmint_gitfork_mod_parser.py b/optimizers/spearmint/spearmint_gitfork_mod_parser.py
new file mode 100644
index 00000000..85df4d02
--- /dev/null
+++ b/optimizers/spearmint/spearmint_gitfork_mod_parser.py
@@ -0,0 +1,49 @@
+##
+# wrapping: A program making it easy to use hyperparameter
+# optimization software.
+# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import logging
+import os
+import sys
+
+logger = logging.getLogger("HPOlib.optimizers.spearmint.spearmint_april2013_mod_parser")
+
+
+def manipulate_config(config):
+ # special cases
+ if not config.has_option('SPEARMINT', 'method'):
+ raise Exception("SPEARMINT:method not specified in .cfg")
+ if not config.has_option('SPEARMINT', 'method_args'):
+ raise Exception("SPEARMINT:method-args not specified in .cfg")
+
+ # GENERAL
+ if not config.has_option('SPEARMINT', 'max_finished_jobs'):
+ config.set('SPEARMINT', 'max_finished_jobs',
+ config.get('HPOLIB', 'number_of_jobs'))
+
+ path_to_optimizer = config.get('SPEARMINT', 'path_to_optimizer')
+ if not os.path.isabs(path_to_optimizer):
+ path_to_optimizer = os.path.join(os.path.dirname(os.path.realpath(__file__)), path_to_optimizer)
+
+ path_to_optimizer = os.path.normpath(path_to_optimizer)
+ if not os.path.exists(path_to_optimizer):
+ logger.critical("Path to optimizer not found: %s" % path_to_optimizer)
+ sys.exit(1)
+
+ config.set('SPEARMINT', 'path_to_optimizer', path_to_optimizer)
+
+ return config
diff --git a/optimizers/spearmint/spearmint_gitfork_mod_src b/optimizers/spearmint/spearmint_gitfork_mod_src
new file mode 160000
index 00000000..365464bc
--- /dev/null
+++ b/optimizers/spearmint/spearmint_gitfork_mod_src
@@ -0,0 +1 @@
+Subproject commit 365464bc602278f22e3a195be5fd10eff253b8db
diff --git a/optimizers/tpe/hyperopt_august2013_mod.py b/optimizers/tpe/hyperopt_august2013_mod.py
index bc5ee359..241838d0 100644
--- a/optimizers/tpe/hyperopt_august2013_mod.py
+++ b/optimizers/tpe/hyperopt_august2013_mod.py
@@ -69,7 +69,7 @@ def build_tpe_call(config, options, optimizer_dir):
# For TPE we have to cd to the exp_dir
call = "python " + os.path.dirname(os.path.realpath(__file__)) + \
"/tpecall.py"
- call = ' '.join([call, '-p', config.get('TPE', 'space'),
+ call = ' '.join([call, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('TPE', 'space'))),
"-m", config.get('TPE', 'number_evals'),
"-s", str(options.seed),
"--cwd", optimizer_dir])
@@ -105,7 +105,7 @@ def restore(config, optimizer_dir, **kwargs):
#noinspection PyUnusedLocal
-def main(config, options, experiment_dir, **kwargs):
+def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs):
# config: Loaded .cfg file
# options: Options containing seed, restore_dir,
# experiment_dir: Experiment directory/Benchmark_directory
@@ -144,7 +144,9 @@ def main(config, options, experiment_dir, **kwargs):
raise Exception("The restore directory does not exist")
optimizer_dir = options.restore
else:
- optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" +
+ optimizer_dir = os.path.join(experiment_dir,
+ experiment_directory_prefix
+ + optimizer_str + "_" +
str(options.seed) + "_" +
time_string)
@@ -155,10 +157,19 @@ def main(config, options, experiment_dir, **kwargs):
if not os.path.exists(optimizer_dir):
os.mkdir(optimizer_dir)
space = config.get('TPE', 'space')
+ abs_space = os.path.abspath(space)
+ parent_space = os.path.join(experiment_dir, optimizer_str, space)
+ if os.path.exists(abs_space):
+ space = abs_space
+ elif os.path.exists(parent_space):
+ space = parent_space
+ else:
+ raise Exception("TPE search space not found. Searched at %s and "
+ "%s" % (abs_space, parent_space))
# Copy the hyperopt search space
- if not os.path.exists(os.path.join(optimizer_dir, space)):
+ if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))):
os.symlink(os.path.join(experiment_dir, optimizer_str, space),
- os.path.join(optimizer_dir, space))
+ os.path.join(optimizer_dir, os.path.basename(space)))
import hyperopt
path_to_loaded_optimizer = os.path.abspath(os.path.dirname(os.path.dirname(hyperopt.__file__)))
diff --git a/optimizers/tpe/hyperopt_august2013_modDefault.cfg b/optimizers/tpe/hyperopt_august2013_modDefault.cfg
index 09cde416..b7a5e8ee 100644
--- a/optimizers/tpe/hyperopt_august2013_modDefault.cfg
+++ b/optimizers/tpe/hyperopt_august2013_modDefault.cfg
@@ -1,7 +1,5 @@
[TPE]
space = space.py
-#exp_dir = %(exp_directory)s/tpe/
-#number_evals = %(number_of_jobs)s
# either relative to __file__ or absolute
path_to_optimizer = ./hyperopt_august2013_mod_src
\ No newline at end of file
diff --git a/optimizers/tpe/random_hyperopt_august2013_mod.py b/optimizers/tpe/random_hyperopt_august2013_mod.py
index 1d35ebcf..f5a16bca 100644
--- a/optimizers/tpe/random_hyperopt_august2013_mod.py
+++ b/optimizers/tpe/random_hyperopt_august2013_mod.py
@@ -73,7 +73,7 @@ def check_dependencies():
def build_random_call(config, options, optimizer_dir):
call = "python " + os.path.dirname(os.path.realpath(__file__)) + \
"/tpecall.py"
- call = ' '.join([call, '-p', config.get('TPE', 'space'),
+ call = ' '.join([call, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('TPE', 'space'))),
"-m", config.get('TPE', 'number_evals'),
"-s", str(options.seed),
"--cwd", optimizer_dir, "--random"])
@@ -104,7 +104,7 @@ def restore(config, optimizer_dir, **kwargs):
# noinspection PyUnusedLocal
-def main(config, options, experiment_dir, **kwargs):
+def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs):
# config: Loaded .cfg file
# options: Options containing seed, restore,
# experiment_dir: Experiment directory/Benchmarkdirectory
@@ -126,7 +126,9 @@ def main(config, options, experiment_dir, **kwargs):
raise Exception("The restore directory does not exist")
optimizer_dir = options.restore
else:
- optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" +
+ optimizer_dir = os.path.join(experiment_dir,
+ experiment_directory_prefix
+ + optimizer_str + "_" +
str(options.seed) + "_" +
time_string)
@@ -137,10 +139,19 @@ def main(config, options, experiment_dir, **kwargs):
if not os.path.exists(optimizer_dir):
os.mkdir(optimizer_dir)
space = config.get('TPE', 'space')
+ abs_space = os.path.abspath(space)
+ parent_space = os.path.join(experiment_dir, optimizer_str, space)
+ if os.path.exists(abs_space):
+ space = abs_space
+ elif os.path.exists(parent_space):
+ space = parent_space
+ else:
+ raise Exception("TPE search space not found. Searched at %s and "
+ "%s" % (abs_space, parent_space))
# Copy the hyperopt search space
- if not os.path.exists(os.path.join(optimizer_dir, space)):
+ if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))):
os.symlink(os.path.join(experiment_dir, optimizer_str, space),
- os.path.join(optimizer_dir, space))
+ os.path.join(optimizer_dir, os.path.basename(space)))
import hyperopt
path_to_loaded_optimizer = os.path.abspath(os.path.dirname(os.path.dirname(hyperopt.__file__)))
diff --git a/scripts/HPOlib-convert b/scripts/HPOlib-convert
old mode 100644
new mode 100755
diff --git a/scripts/HPOlib-plot b/scripts/HPOlib-plot
old mode 100644
new mode 100755
diff --git a/setup.py b/setup.py
index 13a9f8e2..982fc248 100644
--- a/setup.py
+++ b/setup.py
@@ -251,7 +251,8 @@ def run(self):
'networkx',
'protobuf',
'scipy>=0.13.2',
- 'pymongo'
+ 'pymongo',
+ 'psutil'
],
author_email='eggenspk@informatik.uni-freiburg.de',
description=desc,
diff --git a/tests/unittests/test_wrapping_util.py b/tests/unittests/test_wrapping_util.py
index 2d927149..fd2b2517 100644
--- a/tests/unittests/test_wrapping_util.py
+++ b/tests/unittests/test_wrapping_util.py
@@ -76,7 +76,9 @@ def test_save_config_to_file_ignore_none(self):
self.assertEqual(asserted_file_content, file_content)
string_stream.close()
- def test_use_option_parser_with_config(self):
+ def test_parse_config_values_from_unknown_arguments(self):
+ """Test if we can convert a config with Sections and variables into an
+ argparser."""
sys.argv = ['wrapping.py', '-s', '1', '-t', 'DBNet', '-o', 'SMAC',
'--HPOLIB:number_of_jobs', '2']
args, unknown = wrapping.use_arg_parser()
@@ -85,7 +87,26 @@ def test_use_option_parser_with_config(self):
config.read("dummy_config.cfg")
config_args = wrapping_util.parse_config_values_from_unknown_arguments(
unknown, config)
- self.assertEqual(vars(config_args)['HPOLIB:number_of_jobs'], '2')
+ self.assertListEqual(vars(config_args)['HPOLIB:number_of_jobs'], ['2'])
+ self.assertIs(vars(config_args)['GRIDSEARCH:params'], None)
+ self.assertIs(vars(config_args)['HPOLIB:function'], None)
+ self.assertIs(vars(config_args)['HPOLIB:result_on_terminate'], None)
+
+ def test_parse_config_values_from_unknown_arguments2(self):
+ """Test if we can convert a config with Sections and variables into an
+ argparser. Test for arguments with whitespaces"""
+ sys.argv = ['wrapping.py', '-s', '1', '-t', 'DBNet', '-o', 'SMAC',
+ '--HPOLIB:function', 'python', '../branin.py']
+ args, unknown = wrapping.use_arg_parser()
+ self.assertEqual(len(unknown), 3)
+ config = ConfigParser.SafeConfigParser(allow_no_value=True)
+ config.read("dummy_config.cfg")
+ config_args = wrapping_util.parse_config_values_from_unknown_arguments(
+ unknown, config)
+ self.assertListEqual(vars(config_args)['HPOLIB:function'], ['python',
+ '../branin.py'])
+ self.assertIs(vars(config_args)['GRIDSEARCH:params'], None)
+ self.assertIs(vars(config_args)['HPOLIB:result_on_terminate'], None)
def test_nan_mean(self):
self.assertEqual(wrapping_util.nan_mean(np.array([1, 5])), 3)