diff --git a/.gitignore b/.gitignore index 01ff82ea..b7483858 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ *.py[cod] # Optimizers -HPOlib/optimizers/smac_2_06_01-dev/* -HPOlib/optimizers/spearmint_april2013_mod/* -HPOlib/optimizers/hyperopt_august2013_mod/* +*_src* # Runsolver runsolver/src/* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..8c18b4b8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "optimizers/spearmint/spearmint_gitfork_mod_src"] + path = optimizers/spearmint/spearmint_gitfork_mod_src + url = https://github.com/automl/spearmint.git diff --git a/HPOlib/Experiment.py b/HPOlib/Experiment.py index 0c64b64e..2118682c 100644 --- a/HPOlib/Experiment.py +++ b/HPOlib/Experiment.py @@ -19,8 +19,6 @@ import cPickle import logging import os -import scipy -from scipy.stats.distributions import wrapcauchy_gen import sys import tempfile diff --git a/HPOlib/Locker.py b/HPOlib/Locker.py index e80b198a..3e3aadee 100755 --- a/HPOlib/Locker.py +++ b/HPOlib/Locker.py @@ -61,7 +61,7 @@ def unlock(self, filename): if self.locks[filename] == 1: success = safe_delete('%s.lock' % (filename)) if not success: - logger.log("Could not unlock file: %s.\n", filename) + logger.error("Could not unlock file: %s.\n", filename) del self.locks[filename] return success else: diff --git a/HPOlib/Plotting/doAllPlots.py b/HPOlib/Plotting/doAllPlots.py index 42132d2b..f5049105 100644 --- a/HPOlib/Plotting/doAllPlots.py +++ b/HPOlib/Plotting/doAllPlots.py @@ -227,23 +227,16 @@ def main(): _box_whisker(pkl_list=pkl_list, name_list=name_list, save=tmp_save, log=log, cut=args.cut) - # statistics - if save_dir is not "": - tmp_save = os.path.join(save_dir, "statistics_%s.txt" % time_str) - else: - tmp_save = save_dir - sys.stdout.write("statistics.py ... %s ..." % tmp_save) - _statistics(pkl_list=pkl_list, name_list=name_list, save=tmp_save, - log=log, cut=args.cut) - # LaTeX table if save_dir is not "": tmp_save = os.path.join(save_dir, "table_%s.tex" % time_str) else: tmp_save = save_dir sys.stdout.write("generateTexTable.py ... %s ..." % tmp_save) - _generate_tex_table(pkl_list=pkl_list, name_list=name_list, + ret = _generate_tex_table(pkl_list=pkl_list, name_list=name_list, save=tmp_save, log=log, cut=args.cut) + if ret is not None: + print ret # We can always plot this @@ -256,6 +249,15 @@ def main(): _optimizer_overhead(pkl_list=pkl_list, name_list=name_list, save=tmp_save, log=log, cut=args.cut) + # statistics + if save_dir is not "": + tmp_save = os.path.join(save_dir, "statistics_%s.txt" % time_str) + else: + tmp_save = save_dir + sys.stdout.write("statistics.py ... %s ..." % tmp_save) + _statistics(pkl_list=pkl_list, name_list=name_list, save=tmp_save, + log=log, cut=args.cut) + # Error Trace with Std if save_dir is not "": tmp_save = os.path.join(save_dir, "TraceWithStd_perEval_%s.%s" % (time_str, args.file)) diff --git a/HPOlib/Plotting/generateTexTable.py b/HPOlib/Plotting/generateTexTable.py index 76cff0ec..c1f0d119 100644 --- a/HPOlib/Plotting/generateTexTable.py +++ b/HPOlib/Plotting/generateTexTable.py @@ -31,6 +31,7 @@ jinja2 = "" from HPOlib.Plotting import plot_util +from HPOlib import wrapping_util __authors__ = ["Katharina Eggensperger", "Matthias Feurer"] __contact__ = "automl.org" @@ -42,7 +43,7 @@ \\usepackage[landscape]{geometry} \\usepackage{multirow} % import command \multicolmun \\usepackage{tabularx} % Convenient table formatting -\\usepackage{booktabs} % provides \toprule, \midrule and \bottomrule +\\usepackage{booktabs} % provides \\toprule, \midrule and \\bottomrule \\begin{document} @@ -63,43 +64,57 @@ {{ experiment }} & {{ evals }} {%- for name in result_values -%} {%- set results = result_values[name] -%} -{{ ' & ' }}{{ results['mean']|round(3, 'floor') }}$\\pm${{ results['std']|round(3, 'floor')}} & {{ results['min']|round(3, 'floor') }} -{%- endfor %} \\\\ +{{ ' & ' }}{% if results['mean_best'] == True %}\\textbf{ {%- endif %}{{results['mean']|round(3, 'floor') }}{% if results['mean_best'] == True %}}{% endif %}$\\pm${{ results['std']|round(3, 'floor')}} & {{results['min']|round(3, 'floor') }}{%- endfor %} \\\\ \\bottomrule \\end{tabularx} \\end{table} \\end{document} """ -def main(pkl_list, name_list, save, cut=sys.maxint): +def main(pkl_list, name_list, save="", cut=sys.maxint, + template_string=template_string, experiment_name="Name", + num_evals="\\#eval"): + pickles = plot_util.load_pickles(name_list, pkl_list) + best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut) + return generate_tex_template(best_dict, name_list, + template_string=template_string, save=save, + num_evals=num_evals, experiment_name=experiment_name) + + +def generate_tex_template(best_dict, name_list, save="", + template_string=template_string, experiment_name="Name", + num_evals="\\#eval"): tex = StringIO() result_values = OrderedDict([(name[0], dict()) for name in name_list]) - best_dict, idx_dict, keys = plot_util.read_pickles(name_list, pkl_list, cut) + means = [np.mean(best_dict[name]) for name in result_values] + stds = [np.std(best_dict[name]) for name in result_values] + mins = [np.min(best_dict[name]) for name in result_values] + maxs = [np.max(best_dict[name]) for name in result_values] for name in result_values: values = result_values[name] values["mean"] = np.mean(best_dict[name]) - values["mean_bold"] = False - values["mean_italic"] = False + values["mean_best"] = True if \ + wrapping_util.float_eq(values["mean"], min(means)) else False values["std"] = np.std(best_dict[name]) - values["std_bold"] = False - values["std_italic"] = False + values["std_best"] = True if \ + wrapping_util.float_eq(values["std"], min(stds)) else False values["min"] = np.min(best_dict[name]) - values["min_bold"] = False - values["min_italic"] = False + values["min_best"] = True if\ + wrapping_util.float_eq(values["min"], min(mins)) else False - values["max"] = np.min(best_dict[name]) - values["max_bold"] = False - values["max_italic"] = False + values["max"] = np.max(best_dict[name]) + values["max_best"] = True if\ + wrapping_util.float_eq(values["max"], min(maxs)) else False if jinja2: template = Template(template_string) tex.write(template.render(result_values=result_values, - experiment="Name", evals="\\#evals")) + experiment=experiment_name, evals=num_evals)) else: tex.write("Name & #evals") for name in result_values: @@ -119,7 +134,7 @@ def main(pkl_list, name_list, save, cut=sys.maxint): with open(save, "w") as fh: fh.write(table) else: - print table + return table if __name__ == "__main__": diff --git a/HPOlib/Plotting/plotTrace_perExp.py b/HPOlib/Plotting/plotTrace_perExp.py new file mode 100644 index 00000000..85ada58c --- /dev/null +++ b/HPOlib/Plotting/plotTrace_perExp.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +## +# wrapping: A program making it easy to use hyperparameter +# optimization software. +# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from argparse import ArgumentParser +import cPickle +import itertools +import sys + +from matplotlib.pyplot import tight_layout, figure, subplots_adjust, subplot, savefig, show +import matplotlib.gridspec +import numpy as np + +from HPOlib.Plotting import plot_util + +__authors__ = ["Katharina Eggensperger", "Matthias Feurer"] +__contact__ = "automl.org" + + +def plot_optimization_trace_cv(trial_list, name_list, optimum=0, title="", + log=True, save="", y_max=0, y_min=0): + markers =plot_util.get_plot_markers() + colors = plot_util.get_plot_colors() + linestyles = itertools.cycle(['-']) + size = 1 + + ratio = 5 + gs = matplotlib.gridspec.GridSpec(ratio, 1) + fig = figure(1, dpi=100) + fig.suptitle(title, fontsize=16) + ax1 = subplot(gs[0:ratio, :]) + ax1.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) + min_val = sys.maxint + max_val = -sys.maxint + max_trials = 0 + + fig.suptitle(title, fontsize=16) + + # Plot the average error and std + for i in range(len(name_list)): + m = markers.next() + c = colors.next() + l = linestyles.next() + leg = False + for tr in trial_list[i]: + if log: + tr = np.log10(tr) + x = range(1, len(tr)+1) + y = tr + if not leg: + ax1.plot(x, y, color=c, linewidth=size, linestyle=l, label=name_list[i][0]) + leg = True + ax1.plot(x, y, color=c, linewidth=size, linestyle=l) + min_val = min(min_val, min(tr)) + max_val = max(max_val, max(tr)) + max_trials = max(max_trials, len(tr)) + + # Maybe plot on logscale + ylabel = "" + + if log: + ax1.set_ylabel("log10(Minfunction value)" + ylabel) + else: + ax1.set_ylabel("Minfunction value" + ylabel) + + # Descript and label the stuff + leg = ax1.legend(loc='best', fancybox=True) + leg.get_frame().set_alpha(0.5) + ax1.set_xlabel("#Function evaluations") + + if y_max == y_min: + # Set axes limits + ax1.set_ylim([min_val-0.1*abs((max_val-min_val)), max_val+0.1*abs((max_val-min_val))]) + else: + ax1.set_ylim([y_min, y_max]) + ax1.set_xlim([0, max_trials + 1]) + + tight_layout() + subplots_adjust(top=0.85) + if save != "": + savefig(save, dpi=100, facecolor='w', edgecolor='w', + orientation='portrait', papertype=None, format=None, + transparent=False, bbox_inches="tight", pad_inches=0.1) + else: + show() + + +def main(pkl_list, name_list, autofill, optimum=0, save="", title="", log=False, + y_min=0, y_max=0): + + trial_list = list() + for i in range(len(pkl_list)): + tmp_trial_list = list() + max_len = -sys.maxint + for pkl in pkl_list[i]: + fh = open(pkl, "r") + trials = cPickle.load(fh) + fh.close() + + trace = plot_util.get_Trace_cv(trials) + tmp_trial_list.append(trace) + max_len = max(max_len, len(trace)) + trial_list.append(list()) + for tr in tmp_trial_list: + # if len(tr) < max_len: + # tr.extend([tr[-1] for idx in range(abs(max_len - len(tr)))]) + trial_list[-1].append(np.array(tr)) + + plot_optimization_trace_cv(trial_list, name_list, optimum, title=title, log=log, + save=save, y_min=y_min, y_max=y_max) + + if save != "": + sys.stdout.write("Saved plot to " + save + "\n") + else: + sys.stdout.write("..Done\n") + +if __name__ == "__main__": + prog = "python plotTraceWithStd.py WhatIsThis [WhatIsThis ]" + description = "Plot a Trace with std for multiple experiments" + + parser = ArgumentParser(description=description, prog=prog) + + # Options for specific benchmarks + parser.add_argument("-o", "--optimum", type=float, dest="optimum", + default=0, help="If not set, the optimum is supposed to be zero") + + # Options which are available only for this plot + parser.add_argument("-a", "--autofill", action="store_true", dest="autofill", + default=False, help="Fill trace automatically") + + # General Options + parser.add_argument("-l", "--log", action="store_true", dest="log", + default=False, help="Plot on log scale") + parser.add_argument("--max", dest="max", type=float, + default=0, help="Maximum of the plot") + parser.add_argument("--min", dest="min", type=float, + default=0, help="Minimum of the plot") + parser.add_argument("-s", "--save", dest="save", + default="", help="Where to save plot instead of showing it?") + parser.add_argument("-t", "--title", dest="title", + default="", help="Optional supertitle for plot") + + args, unknown = parser.parse_known_args() + + sys.stdout.write("\nFound " + str(len(unknown)) + " arguments\n") + + pkl_list_main, name_list_main = plot_util.get_pkl_and_name_list(unknown) + + main(pkl_list=pkl_list_main, name_list=name_list_main, autofill=args.autofill, optimum=args.optimum, + save=args.save, title=args.title, log=args.log, y_min=args.min, y_max=args.max) diff --git a/HPOlib/Plotting/plot_util.py b/HPOlib/Plotting/plot_util.py index 4f148bd4..a20abe45 100644 --- a/HPOlib/Plotting/plot_util.py +++ b/HPOlib/Plotting/plot_util.py @@ -22,10 +22,16 @@ import numpy as np import sys +import HPOlib.wrapping_util + __authors__ = ["Katharina Eggensperger", "Matthias Feurer"] __contact__ = "automl.org" +# A super-simple cache for unpickled objects... +cache = dict() + + def get_plot_markers(): return itertools.cycle(['o', 's', 'x', '^']) @@ -43,7 +49,39 @@ def get_plot_colors(): "#999999"]) # Grey -def read_pickles(name_list, pkl_list, cut=sys.maxint): +def load_pickles(name_list, pkl_list): + pickles = dict() + for i in range(len(name_list)): + key = name_list[i][0] + pickles[key] = list() + + for pkl in pkl_list[i]: + if cache.get(pkl) is None: + fh = open(pkl) + pickles[key].append(cPickle.load(fh)) + fh.close() + cache[pkl] = pickles[key][-1] + else: + pickles[key].append(cache.get(pkl)) + return pickles + + +def get_best_dict(name_list, pickles, cut=sys.maxint): + """ + Get the best values of many experiments. + + Input + * name_list: A list with of tuples of kind (optimizer_name, num_pickles) + * pickles: A dictionary with all pickle files for an optimizer_name + * cut: How many iterations should be considered + + Returns: + * best_dict: A dictionary with the best response value for every optimizer + * idx_dict: A dictionary with the number of iterations needed to find the + optimum + * keys: A list with optimizer names. + + """ best_dict = dict() idx_dict = dict() keys = list() @@ -51,11 +89,8 @@ def read_pickles(name_list, pkl_list, cut=sys.maxint): keys.append(name_list[i][0]) best_dict[name_list[i][0]] = list() idx_dict[name_list[i][0]] = list() - for pkl in pkl_list[i]: - fh = open(pkl) - trial = cPickle.load(fh) - fh.close() - best, idx = get_best_value_and_index(trial, cut) + for pkl in pickles[name_list[i][0]]: + best, idx = get_best_value_and_index(pkl, cut) best_dict[name_list[i][0]].append(best) idx_dict[name_list[i][0]].append(idx) return best_dict, idx_dict, keys @@ -69,7 +104,7 @@ def get_pkl_and_name_list(argument_list): if not ".pkl" in argument_list[i] and now_data: raise ValueError("You need at least on .pkl file per Experiment, %s has none" % name_list[-1]) elif not ".pkl" in argument_list[i] and not now_data: - print "Adding", argument_list[i] + # print "Adding", argument_list[i] name_list.append([argument_list[i], 0]) pkl_list.append(list()) now_data = True @@ -133,4 +168,23 @@ def get_best_value_and_index(trials, cut=False): else: best_value = traj[-1] best_index = np.argmin(traj) - return best_value, best_index \ No newline at end of file + return best_value, best_index + + +def get_Trace_cv(trials): + trace = list() + trials_list = trials['trials'] + instance_order = trials['instance_order'] + instance_mean = np.ones([len(trials_list), 1]) * np.inf + instance_val = np.ones([len(trials_list), len(trials_list[0]['instance_results'])]) * np.nan + for tr_idx, in_idx in instance_order: + instance_val[tr_idx, in_idx] = trials_list[tr_idx]['instance_results'][in_idx] + + val = HPOlib.wrapping_util.nan_mean(instance_val[tr_idx, :]) + if np.isnan(val): + val = np.inf + instance_mean[tr_idx] = val + trace.append(np.min(instance_mean, axis=0)[0]) + if np.isnan(trace[-1]): + del trace[-1] + return trace \ No newline at end of file diff --git a/HPOlib/Plotting/results.sh b/HPOlib/Plotting/results.sh index fea13d2a..c793a766 100644 --- a/HPOlib/Plotting/results.sh +++ b/HPOlib/Plotting/results.sh @@ -86,9 +86,9 @@ then directory=`ls | grep "^hyperopt_august2013_mod_${i}_"` if [ -a "${directory}/hyperopt_august2013_mod.out" ] then - num=`cat ${directory}/hyperopt_august2013_mod.out | grep " -----------------------RUNNING RUNSOLVER" | wc -l` + num=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result:" | wc -l` - per=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result for ParamILS:" | sort -r | tail -1` + per=`cat ${directory}/hyperopt_august2013_mod.out | grep "Result:" | cut -d" " -f 4 | sort -r -n | tail -1` per=`echo $per | cut -d' ' -f9` per=`echo $per | sed 's/,//'` @@ -114,9 +114,9 @@ then directory=`ls | grep "^random_hyperopt_august2013_mod_${i}_"` if [ -a "${directory}/random_hyperopt_august2013_mod.out" ] then - num=`cat ${directory}/random_hyperopt_august2013_mod.out | grep " -----------------------RUNNING RUNSOLVER" | wc -l` + num=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result:" | wc -l` - per=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result for ParamILS:" | sort -r | tail -1` + per=`cat ${directory}/random_hyperopt_august2013_mod.out | grep "Result:" | cut -d" " -f 4 | sort -r -n | tail -1` per=`echo $per | cut -d' ' -f9` per=`echo $per | sed 's/,//'` diff --git a/HPOlib/Plotting/statistics.py b/HPOlib/Plotting/statistics.py index 62b14100..8b6e999a 100644 --- a/HPOlib/Plotting/statistics.py +++ b/HPOlib/Plotting/statistics.py @@ -113,7 +113,8 @@ def _mann_whitney_u(x, y=None): def main(pkl_list, name_list, cut=sys.maxint): - best_dict, idx_dict, keys = plot_util.read_pickles(name_list, pkl_list, + pickles = plot_util.load_pickles(name_list, pkl_list) + best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut=cut) for k in keys: diff --git a/HPOlib/__init__.py b/HPOlib/__init__.py index 64ca9bc7..0b7f71ee 100644 --- a/HPOlib/__init__.py +++ b/HPOlib/__init__.py @@ -1,4 +1,4 @@ __authors__ = ["Katharina Eggensperger", "Matthias Feurer"] __contact__ = "automl.org" -__version__ = "0.1.0rc1" +__version__ = "0.1.0" diff --git a/HPOlib/benchmark_functions.py b/HPOlib/benchmark_functions.py index 5a2eb531..af81d352 100644 --- a/HPOlib/benchmark_functions.py +++ b/HPOlib/benchmark_functions.py @@ -461,7 +461,7 @@ def save_svm_on_grid(params, ret_time=False, **kwargs): def svm_on_grid(c, alpha, epsilon, ret_time=False): - # Values for an 6*8*8 grid search which was performed by Hofman et. al. + # Values for an 24*14*4 grid search which was performed by Miller et. al. # Values obtained from Jasper Snoek # C, alpha, epsilon # opt_time: return time instead of performance diff --git a/HPOlib/config_parser/generalDefault.cfg b/HPOlib/config_parser/generalDefault.cfg index 511fe2e9..9e5d80cd 100644 --- a/HPOlib/config_parser/generalDefault.cfg +++ b/HPOlib/config_parser/generalDefault.cfg @@ -1,4 +1,5 @@ [HPOLIB] +console_output_delay = 1.0 #Will be used for wrapping.py and SMAC run_instance = runsolver_wrapper.py use_own_time_measurement = True @@ -10,6 +11,8 @@ number_of_concurrent_jobs = 1 function_setup = function_teardown = +experiment_directory_prefix = + #The mem-limit must be expressed in mega-bytes #The cpu-limit must be expressed in seconds (CPU time) #The time-limit must be expressed in seconds (wall clock time) diff --git a/HPOlib/cv.py b/HPOlib/cv.py index 688e79b1..99964e56 100644 --- a/HPOlib/cv.py +++ b/HPOlib/cv.py @@ -195,11 +195,13 @@ def main(*args, **kwargs): for arg in args: if type(arg) == dict: params = arg + break if params is None: logger.critical("No parameter dict found in cv.py.\n" "args: %s\n kwargs: %s", args, kwargs) - sys.exit(1) + # TODO: Hack for TPE and AUTOWeka + params = args # Load the experiment to do time-keeping cv_starttime = time.time() diff --git a/HPOlib/runsolver_wrapper.py b/HPOlib/runsolver_wrapper.py index 169ab626..09e8ff78 100644 --- a/HPOlib/runsolver_wrapper.py +++ b/HPOlib/runsolver_wrapper.py @@ -52,22 +52,22 @@ def remove_param_metadata(params): """ for para in params: new_name = para + + if isinstance(params[para], str): + params[para] = params[para].strip("'") if "LOG10_" in para: pos = para.find("LOG10") new_name = para[0:pos] + para[pos + 6:] - # new_name = new_name.strip("_") params[new_name] = np.power(10, float(params[para])) del params[para] elif "LOG2" in para: pos = para.find("LOG2_") new_name = para[0:pos] + para[pos + 5:] - # new_name = new_name.strip("_") params[new_name] = np.power(2, float(params[para])) del params[para] elif "LOG_" in para: pos = para.find("LOG") new_name = para[0:pos] + para[pos + 4:] - # new_name = new_name.strip("_") params[new_name] = np.exp(float(params[para])) del params[para] #Check for Q value, returns round(x/q)*q @@ -75,7 +75,6 @@ def remove_param_metadata(params): if m is not None: pos = new_name.find(m.group(0)) tmp = new_name[0:pos] + new_name[pos + len(m.group(0)):] - #tmp = tmp.strip("_") q = float(m.group(0)[1:-1]) params[tmp] = round(float(params[new_name]) / q) * q del params[new_name] @@ -276,7 +275,7 @@ def parse_output_files(cfg, run_instance_output, runsolver_output_file): error + " Please have a look at " + runsolver_output_file) # It is useful to have the run_instance_output for debugging - os.remove(run_instance_output) + # os.remove(run_instance_output) return rval @@ -331,6 +330,8 @@ def main(): experiment.set_one_fold_running(trial_index, fold) del experiment # release Experiment lock logger.debug("Calling: %s" % cmd) + #sys.stdout.write(cmd + "\n") + #sys.stdout.flush() process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") diff --git a/HPOlib/wrapping.py b/HPOlib/wrapping.py index c4bbc886..a173cee6 100644 --- a/HPOlib/wrapping.py +++ b/HPOlib/wrapping.py @@ -19,6 +19,7 @@ from argparse import ArgumentParser import imp import logging +import psutil import os from Queue import Queue, Empty import signal @@ -26,12 +27,13 @@ import subprocess import sys from threading import Thread +import thread import time import HPOlib import HPOlib.check_before_start as check_before_start import HPOlib.wrapping_util as wrapping_util -# Experiment is imported after we check for numpy +# Import experiment only after the check for numpy succeeded __authors__ = ["Katharina Eggensperger", "Matthias Feurer"] __contact__ = "automl.org" @@ -44,6 +46,59 @@ logger = logging.getLogger("HPOlib.wrapping") +def get_all_p_for_pgid(): + current_pgid = os.getpgid(os.getpid()) + pids = psutil.pids() + running_pid = [] + for pid in pids: + try: + pgid = os.getpgid(pid) + except: + continue + + # Don't try to kill HPOlib-run + if pgid == current_pgid and pid != os.getpid(): + # This solves the problem that a Zombie process counts + # towards the number of process which have to be killed + running_pid.append(pid) + return running_pid + + +def kill_children(sig): + # TODO: somehow wait, until the Experiment pickle is written to disk + running_pid = get_all_p_for_pgid() + + logger.critical("Running %s" % str(running_pid)) + for pid in running_pid: + try: + os.kill(pid, sig) + except Exception as e: + logger.error(type(e)) + logger.error(e) + + +class Exit: + def __init__(self): + self.exit_flag = False + self.signal = None + + def true(self): + self.exit_flag = True + + def false(self): + self.exit_flag = False + + def set_exit_flag(self, exit): + self.exit_flag = exit + + def get_exit(self): + return self.exit_flag + + def signal_callback(self, signal, frame): + self.true() + self.signal = signal + + def calculate_wrapping_overhead(trials): wrapping_time = 0 for times in zip(trials.cv_starttime, trials.cv_endtime): @@ -83,6 +138,56 @@ def calculate_optimizer_time(trials): return np.nansum(optimizer_time) +def output_experiment_pickle(console_output_delay, + printed_start_configuration, + printed_end_configuration, + optimizer_dir_in_experiment, + optimizer, experiment_directory_prefix, lock, + Experiment, np, exit): + current_best = -1 + while True: + try: + trials = Experiment.Experiment(optimizer_dir_in_experiment, + experiment_directory_prefix + optimizer) + except Exception as e: + logger.error(e) + time.sleep(console_output_delay) + continue + + with lock: + for i in range(len(printed_end_configuration), len(trials.instance_order)): + configuration = trials.instance_order[i][0] + fold = trials.instance_order[i][1] + if i + 1 > len(printed_start_configuration): + logger.info("Starting configuration %5d, fold %2d", + configuration, fold) + printed_start_configuration.append(i) + + if np.isfinite(trials.trials[configuration] + ["instance_results"][fold]): + last_result = trials.trials[configuration] \ + ["instance_results"][fold] + tmp_current_best = trials.get_arg_best() + if tmp_current_best <= i: + current_best = tmp_current_best + # Calculate current best + # Check if last result is finite, if not calc nanmean over all instances + dct_helper = trials.trials[current_best] + res = dct_helper["result"] if \ + np.isfinite(dct_helper["result"]) \ + else wrapping_util.nan_mean(dct_helper["instance_results"]) + #np.nanmean(trials.trials[current_best]["instance_results"]) + # nanmean does not work for all numpy version + logger.info("Result %10f, current best %10f", + last_result, res) + printed_end_configuration.append(i) + + del trials + time.sleep(console_output_delay) + if exit: + break + + def use_arg_parser(): """Parse all options which can be handled by the wrapping script. Unknown arguments are ignored and returned as a list. It is useful to @@ -178,11 +283,14 @@ def main(): import traceback logger.critical(traceback.format_exc()) sys.exit(1) + experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer_call, optimizer_dir_in_experiment = optimizer_module.main(config=config, - options=args, - experiment_dir=experiment_dir) + options=args, + experiment_dir=experiment_dir, + experiment_directory_prefix=experiment_directory_prefix) cmd = optimizer_call + config.set("HPOLIB", "seed", str(args.seed)) with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f: config.set("HPOLIB", "is_not_original_config_file", "True") wrapping_util.save_config_to_file(f, config, write_nones=True) @@ -194,7 +302,9 @@ def main(): except OSError: pass folds = config.getint('HPOLIB', 'number_cv_folds') - trials = Experiment.Experiment(optimizer_dir_in_experiment, optimizer, folds=folds, + trials = Experiment.Experiment(optimizer_dir_in_experiment, + experiment_directory_prefix + optimizer, + folds=folds, max_wallclock_time=config.get('HPOLIB', 'cpu_limit'), title=args.title) @@ -237,18 +347,51 @@ def main(): fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: try: - output = subprocess.check_output(fn_setup, stderr=subprocess.STDOUT) + logger.info(fn_setup) + fn_setup = shlex.split(fn_setup) + output = subprocess.check_output(fn_setup, stderr=subprocess.STDOUT) #, + #shell=True, executable="/bin/bash") + logger.debug(output) except subprocess.CalledProcessError as e: logger.critical(e.output) sys.exit(1) + except OSError as e: + logger.critical(e.message) + logger.critical(e.filename) + sys.exit(1) logger.info(cmd) output_file = os.path.join(optimizer_dir_in_experiment, optimizer + ".out") fh = open(output_file, "a") cmd = shlex.split(cmd) print cmd + + # Use a flag which is set to true as soon as all children are + # supposed to be killed + exit_ = Exit() + signal.signal(signal.SIGTERM, exit_.signal_callback) + signal.signal(signal.SIGABRT, exit_.signal_callback) + signal.signal(signal.SIGINT, exit_.signal_callback) + signal.signal(signal.SIGHUP, exit_.signal_callback) + + # Change into the current experiment directory + # Some optimizer might expect this + dir_before_exp = os.getcwd() + os.chdir(optimizer_dir_in_experiment) + # See man 7 credentials for the meaning of a process group id + # This makes wrapping.py useable with SGEs default behaviour, + # where qdel sends a SIGKILL to a whole process group + logger.info(os.getpid()) + os.setpgid(os.getpid(), os.getpid()) + # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879 + # maybe it has something todo with the previous behaviour where a + # session id was set... proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, preexec_fn=os.setsid) + stderr=subprocess.PIPE) + + global child_process_pid + child_process_pid = proc.pid + logger.info("-----------------------RUNNING----------------------------------") # http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python # How often is the experiment pickle supposed to be opened? @@ -257,15 +400,17 @@ def main(): else: optimizer_end_time = sys.float_info.max - last_output = time.time() + console_output_delay = config.getfloat("HPOLIB", "console_output_delay") + printed_start_configuration = list() printed_end_configuration = list() - current_best = -1 + sent_SIGINT = False + sent_SIGINT_time = np.inf sent_SIGTERM = False + sent_SIGTERM_time = np.inf sent_SIGKILL = False - # After the evaluation finished, we scan the experiment pickle twice - # to print everything! - minimal_runs_to_go = 2 + sent_SIGKILL_time = np.inf + def enqueue_output(out, queue): for line in iter(out.readline, b''): @@ -280,9 +425,26 @@ def enqueue_output(out, queue): stdout_thread.daemon = True stderr_thread.start() stdout_thread.start() - logger.info('Optimizer runs with PID: %d', proc.pid) + if not (args.verbose or args.silent): + lock = thread.allocate_lock() + thread.start_new_thread(output_experiment_pickle, + (console_output_delay, + printed_start_configuration, + printed_end_configuration, + optimizer_dir_in_experiment, + optimizer, experiment_directory_prefix, + lock, Experiment, np, False)) + logger.info('Optimizer runs with PID: %d', proc.pid) + + while True: + # this implements the total runtime limit + if time.time() > optimizer_end_time and not sent_SIGINT: + logger.info("Reached total_time_limit, going to shutdown.") + exit_.true() + + # necessary, otherwise HPOlib-run takes 100% of one processor + time.sleep(0.2) - while minimal_runs_to_go > 0: # Think of this as a do-while loop... try: while True: line = stdout_queue.get_nowait() @@ -307,74 +469,68 @@ def enqueue_output(out, queue): except Empty: pass - if time.time() > optimizer_end_time and not sent_SIGTERM: - os.killpg(proc.pid, signal.SIGTERM) + ret = proc.poll() + + running = get_all_p_for_pgid() + if ret is not None and len(running) == 0: + break + # TODO: what happens if we have a ret but something is still + # running? + + if exit_.get_exit() == True and not sent_SIGINT: + logger.info("Sending SIGINT") + kill_children(signal.SIGINT) + sent_SIGINT_time = time.time() + sent_SIGINT = True + + if exit_.get_exit() == True and not sent_SIGTERM and time.time() \ + > sent_SIGINT_time + 100: + logger.info("Sending SIGTERM") + kill_children(signal.SIGTERM) + sent_SIGTERM_time = time.time() sent_SIGTERM = True - if time.time() > optimizer_end_time + 200 and not sent_SIGKILL: - os.killpg(proc.pid, signal.SIGKILL) + if exit_.get_exit() == True and not sent_SIGKILL and time.time() \ + > sent_SIGTERM_time + 100: + logger.info("Sending SIGKILL") + kill_children(signal.SIGKILL) + sent_SIGKILL_time = time.time() sent_SIGKILL = True - fh.flush() - # necessary, otherwise HPOlib-run takes 100% of one processor - time.sleep(0.1) - - if not (args.verbose or args.silent) and time.time() - last_output > 1: - trials = Experiment.Experiment(optimizer_dir_in_experiment, - optimizer) - - for i in range(len(printed_end_configuration), len(trials.instance_order)): - configuration = trials.instance_order[i][0] - fold = trials.instance_order[i][1] - if i + 1 > len(printed_start_configuration): - logger.info("Starting configuration %5d, fold %2d", - configuration, fold) - printed_start_configuration.append(i) - - if np.isfinite(trials.trials[configuration] - ["instance_results"][fold]): - last_result = trials.trials[configuration] \ - ["instance_results"][fold] - tmp_current_best = trials.get_arg_best() - if tmp_current_best <= i: - current_best = tmp_current_best - # Calculate current best - # Check if last result is finite, if not calc nanmean over all instances - dct_helper = trials.trials[current_best] - res = dct_helper["result"] if \ - np.isfinite(dct_helper["result"]) \ - else wrapping_util.nan_mean(dct_helper["instance_results"]) - #np.nanmean(trials.trials[current_best]["instance_results"]) - # nanmean does not work for all numpy version - logger.info("Result %10f, current best %10f", - last_result, res) - printed_end_configuration.append(i) - - del trials - last_output = time.time() - - if proc.poll() is not None: # the end condition for the - minimal_runs_to_go -= 1 # do-while loop - - elif args.verbose or args.silent: - if proc.poll() is not None: - minimal_runs_to_go -= 1 - ret = proc.returncode + del proc + + if not (args.verbose or args.silent): + output_experiment_pickle(console_output_delay, + printed_start_configuration, + printed_end_configuration, + optimizer_dir_in_experiment, + optimizer, experiment_directory_prefix, + lock, Experiment, np, True) logger.info("-----------------------END--------------------------------------") fh.close() + # Change back into to directory + os.chdir(dir_before_exp) + # call target_function.teardown() fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: try: - output = subprocess.check_output(fn_teardown, stderr=subprocess.STDOUT) + fn_teardown = shlex.split(fn_teardown) + output = subprocess.check_output(fn_teardown, stderr=subprocess.STDOUT) #, + #shell=True, executable="/bin/bash") except subprocess.CalledProcessError as e: logger.critical(e.output) sys.exit(1) + except OSError as e: + logger.critical(e.message) + logger.critical(e.filename) + sys.exit(1) - trials = Experiment.Experiment(optimizer_dir_in_experiment, optimizer) + trials = Experiment.Experiment(optimizer_dir_in_experiment, + experiment_directory_prefix + optimizer) trials.endtime.append(time.time()) #noinspection PyProtectedMember trials._save_jobs() diff --git a/HPOlib/wrapping_util.py b/HPOlib/wrapping_util.py index 30536998..8c5f1edd 100644 --- a/HPOlib/wrapping_util.py +++ b/HPOlib/wrapping_util.py @@ -27,6 +27,7 @@ import os from StringIO import StringIO import sys +import types import config_parser.parse as parse @@ -108,7 +109,8 @@ def load_experiment_config_file(): if not config.has_option("HPOLIB", "is_not_original_config_file"): logger.critical("Config file in directory %s seems to be an" " original config which was not created by wrapping.py. " - "Please contact the HPOlib maintainer to solve this issue.") + "Are you sure that you are in the right directory?" % + os.getcwd()) sys.exit(1) return config except IOError as e: @@ -207,7 +209,8 @@ def parse_config_values_from_unknown_arguments(unknown_arguments, config): config: A ConfigParser.SafeConfigParser object which contains all keys should be parsed from the unknown_arguments list. Returns: - an argparse.Namespace object containing the parsed values. + an argparse.Namespace object containing the parsed values. These are + packed inside a python list or None if not present. Raises: an error if an argument from unknown_arguments is not a key in config """ @@ -219,7 +222,7 @@ def parse_config_values_from_unknown_arguments(unknown_arguments, config): parser = ArgumentParser() for argument in further_possible_command_line_arguments: - parser.add_argument(argument) + parser.add_argument(argument, nargs="+") return parser.parse_args(unknown_arguments) @@ -230,7 +233,10 @@ def config_with_cli_arguments(config, config_overrides): for key in config.options(section): cli_key = "%s:%s" % (section, key) if cli_key in arg_dict: - config.set(section, key, arg_dict[cli_key]) + value = arg_dict[cli_key] + if value is not None and not isinstance(value, types.StringTypes): + value = " ".join(value) + config.set(section, key, value) else: config.remove_option(section, key) return config diff --git a/INSTALL.md b/INSTALL.md index 212f6492..e385c84c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -80,7 +80,7 @@ ls optimizers/smac ```bash cd benchmarks/branin -HPOlib-run -o ../../optimizers/smac/sma -s 23 +HPOlib-run -o ../../optimizers/smac/smac -s 23 ``` This takes depending on your machine ~2 minutes. You can now plot the results of your first experiment: @@ -92,17 +92,20 @@ HPOlib-plot FIRSTRUN smac_2_06_01-dev_23_*/smac_*.pkl -s `pwd`/Plots/ You can test the other optimizers (spearmint will take quite longer 30min): ```bash - HPOlib-run -o ../../optimizers/tpe/h -s 23 - HPOlib-run -o ../../optimizers/spearmint/s -s 23 +HPOlib-run -o ../../optimizers/tpe/h -s 23 +HPOlib-run -o ../../optimizers/spearmint/spearmint_april2013 -s 23 ``` and again: ```bash - HPOlib-plot SMAC smac_2_06_01-dev_23_*/smac_*.pkl TPE hyperopt_august2013_mod_23_*/hyp*.pkl SPEARMINT spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/ +HPOlib-plot SMAC smac_2_06_01-dev_23_*/smac_*.pkl TPE hyperopt_august2013_mod_23_*/hyp*.pkl SPEARMINT spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/ +``` + and to check the general performance on this super complex benchmark: - HPOlib-plot BRANIN smac_2_06_01-dev_23_*/smac_*.pkl hyperopt_august2013_mod_23_*/hyp*.pkl spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/ +``` +HPOlib-plot BRANIN smac_2_06_01-dev_23_*/smac_*.pkl hyperopt_august2013_mod_23_*/hyp*.pkl spearmint_april2013_mod_23_*/spear*.pkl -s `pwd`/Plots/ ``` Using without installation @@ -112,21 +115,21 @@ Using without installation If you decide to not install HPOlib, you need to download the optimizer code by yourself ```bash - cd optimizers - wget http://www.automl.org/hyperopt_august2013_mod_src.tar.gz - wget http://www.automl.org/smac_2_06_01-dev_src.tar.gz - wget http://www.automl.org/spearmint_april2013_mod_src.tar.gz +cd optimizers +wget http://www.automl.org/hyperopt_august2013_mod_src.tar.gz +wget http://www.automl.org/smac_2_06_01-dev_src.tar.gz +wget http://www.automl.org/spearmint_april2013_mod_src.tar.gz - tar -xf hyperopt_august2013_mod_src.tar.gz - mv hyperopt_august2013_mod_src tpe/ +tar -xf hyperopt_august2013_mod_src.tar.gz +mv hyperopt_august2013_mod_src tpe/ - tar -xf smac_2_06_01-dev_src.tar.gz - mv smac_2_06_01-dev_src.tar.gz smac/ +tar -xf smac_2_06_01-dev_src.tar.gz +mv smac_2_06_01-dev_src.tar.gz smac/ - tar -xf spearmint_april2013_mod_src.tar.gz - mv spearmint_april2013_mod_src spearmint/ +tar -xf spearmint_april2013_mod_src.tar.gz +mv spearmint_april2013_mod_src spearmint/ - cd ../ +cd ../ ``` And you need to install all requirements: @@ -147,25 +150,25 @@ e.g. with Also you need the runsolver ```bash - wget http://www.cril.univ-artois.fr/~roussel/runsolver/runsolver-3.3.2.tar.bz2 - tar -xf runsolver-3.3.2.tar.bz2 - cd runsolver/src +wget http://www.cril.univ-artois.fr/~roussel/runsolver/runsolver-3.3.2.tar.bz2 +tar -xf runsolver-3.3.2.tar.bz2 +cd runsolver/src make ``` as this might not work, you can change the makefile via ```bash - sed -i 's/\/usr\/include\/asm\/unistd/\/usr\/include\/unistd/g' ./Makefile - make +sed -i 's/\/usr\/include\/asm\/unistd/\/usr\/include\/unistd/g' ./Makefile +make ``` then you need to add runsolver (and HPOlib) to your PATH (PYTHONPATH): ```bash - cd ../../ - export PATH=$PATH:/path/to/runsolver/src/ - export PYTHONPATH=$PYTHONPATH:`pwd` +cd ../../ +export PATH=$PATH:/path/to/runsolver/src/ +export PYTHONPATH=$PYTHONPATH:`pwd` ``` then you can run a benchmark like in step 5.) from installing with setup.py with replacing diff --git a/benchmarks/branin/config.cfg b/benchmarks/branin/config.cfg index 4a7dbc05..c14fa870 100644 --- a/benchmarks/branin/config.cfg +++ b/benchmarks/branin/config.cfg @@ -1,5 +1,5 @@ [SMAC] -p = smac_2_06_01-dev/params.pcs +p = params.pcs [TPE] space = space.py diff --git a/optimizers/smac/smac_2_06_01-dev.py b/optimizers/smac/smac_2_06_01-dev.py index 2b038d55..1df77c8c 100644 --- a/optimizers/smac/smac_2_06_01-dev.py +++ b/optimizers/smac/smac_2_06_01-dev.py @@ -92,7 +92,7 @@ def build_smac_call(config, options, optimizer_dir): '--algoExec', '"python', os.path.join(algo_exec_dir, config.get('SMAC', 'algo_exec')) + '"', '--execDir', optimizer_dir, - '-p', config.get('SMAC', 'p'), + '-p', os.path.join(optimizer_dir, os.path.basename(config.get('SMAC', 'p'))), # The experiment dir MUST not be specified when restarting, it is set # further down in the code # '--experimentDir', optimizer_dir, @@ -196,7 +196,7 @@ def restore(config, optimizer_dir, **kwargs): #noinspection PyUnusedLocal -def main(config, options, experiment_dir, **kwargs): +def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory @@ -211,7 +211,9 @@ def main(config, options, experiment_dir, **kwargs): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: - optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" + + optimizer_dir = os.path.join(experiment_dir, + experiment_directory_prefix + + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd = build_smac_call(config, options, optimizer_dir) @@ -221,7 +223,20 @@ def main(config, options, experiment_dir, **kwargs): os.mkdir(optimizer_dir) # TODO: This can cause huge problems when the files are located # somewhere else? - params = os.path.split(config.get('SMAC', "p"))[1] + space = config.get('SMAC', "p") + abs_space = os.path.abspath(space) + parent_space = os.path.join(experiment_dir, optimizer_str, space) + if os.path.exists(abs_space): + space = abs_space + elif os.path.exists(parent_space): + space = parent_space + else: + raise Exception("SMAC search space not found. Searched at %s and " + "%s" % (abs_space, parent_space)) + + if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): + os.symlink(os.path.join(experiment_dir, optimizer_str, space), + os.path.join(optimizer_dir, os.path.basename(space))) # Copy the smac search space and create the instance information fh = open(os.path.join(optimizer_dir, 'train.txt'), "w") @@ -236,10 +251,7 @@ def main(config, options, experiment_dir, **kwargs): fh = open(os.path.join(optimizer_dir, "scenario.txt"), "w") fh.close() - - if not os.path.exists(os.path.join(optimizer_dir, params)): - os.symlink(os.path.join(experiment_dir, optimizer_str, params), - os.path.join(optimizer_dir, params)) + logger.info("### INFORMATION ################################################################") logger.info("# You're running %40s #" % config.get('SMAC', 'path_to_optimizer')) for v in version_info: diff --git a/optimizers/smac/smac_2_06_01-devDefault.cfg b/optimizers/smac/smac_2_06_01-devDefault.cfg index c0d70c79..605cc3f0 100644 --- a/optimizers/smac/smac_2_06_01-devDefault.cfg +++ b/optimizers/smac/smac_2_06_01-devDefault.cfg @@ -3,21 +3,9 @@ handles_cv=1 [SMAC] -# cutoff_time = runsolver_time_limit + 100 sec -# Set otherwise -# algo_exec = %(run_instance)s -# num_concurrent_algo_execs = %(number_of_concurrent_jobs)s -# experimentDir = %(exp_directory)s/ -# outputDirectory = %(exp_directory)s/smac/output/ -# total_num_runs_limit = %(number_of_jobs)s * %(cv)s - -num_run = 0 intra_instance_obj = MEAN run_obj = QUALITY -#instance_file = train.txt -#test_instance_file = test.txt -#execDir = ./ p = smac_2_06_01-dev/params.pcs rf_full_tree_bootstrap = False rf_split_min = 10 @@ -25,8 +13,6 @@ rf_split_min = 10 adaptive_capping = false max_incumbent_runs = 2000 num_iterations = 2147483647 -# No one actually cares about it... -runtime_limit = 2147483647 deterministic = True retry_target_algorithm_run_count = 0 diff --git a/optimizers/spearmint/spearmint_april2013_mod.py b/optimizers/spearmint/spearmint_april2013_mod.py index cc52564d..81e79d2c 100644 --- a/optimizers/spearmint/spearmint_april2013_mod.py +++ b/optimizers/spearmint/spearmint_april2013_mod.py @@ -60,7 +60,7 @@ def build_spearmint_call(config, options, optimizer_dir): print call = 'python ' + os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint_sync.py') call = ' '.join([call, optimizer_dir, - '--config', config.get('SPEARMINT', 'config'), + '--config', os.path.join(optimizer_dir, os.path.basename(config.get('SPEARMINT', 'config'))), '--max-concurrent', config.get('HPOLIB', 'number_of_concurrent_jobs'), '--max-finished-jobs', config.get('SPEARMINT', 'max_finished_jobs'), '--polling-time', config.get('SPEARMINT', 'spearmint_polling_time'), @@ -103,7 +103,7 @@ def restore(config, optimizer_dir, **kwargs): #noinspection PyUnusedLocal -def main(config, options, experiment_dir, **kwargs): +def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory @@ -118,7 +118,9 @@ def main(config, options, experiment_dir, **kwargs): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: - optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" + + optimizer_dir = os.path.join(experiment_dir, + experiment_directory_prefix + + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call @@ -128,10 +130,20 @@ def main(config, options, experiment_dir, **kwargs): if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) # Make a link to the Protocol-Buffer config file - configpb = config.get('SPEARMINT', 'config') - if not os.path.exists(os.path.join(optimizer_dir, configpb)): - os.symlink(os.path.join(experiment_dir, optimizer_str, configpb), - os.path.join(optimizer_dir, configpb)) + space = config.get('SPEARMINT', 'config') + abs_space = os.path.abspath(space) + parent_space = os.path.join(experiment_dir, optimizer_str, space) + if os.path.exists(abs_space): + space = abs_space + elif os.path.exists(parent_space): + space = parent_space + else: + raise Exception("Spearmint search space not found. Searched at %s and " + "%s" % (abs_space, parent_space)) + # Copy the hyperopt search space + if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): + os.symlink(os.path.join(experiment_dir, optimizer_str, space), + os.path.join(optimizer_dir, os.path.basename(space))) logger.info("### INFORMATION ################################################################") logger.info("# You're running %40s #" % path_to_optimizer) logger.info("%s" % version_info) diff --git a/optimizers/spearmint/spearmint_april2013_modDefault.cfg b/optimizers/spearmint/spearmint_april2013_modDefault.cfg index eed72c5a..505e8970 100644 --- a/optimizers/spearmint/spearmint_april2013_modDefault.cfg +++ b/optimizers/spearmint/spearmint_april2013_modDefault.cfg @@ -1,24 +1,15 @@ [SPEARMINT] -#Which spearmint script to call -script = python spearmint_sync.py # Which chooser to take (string) method = GPEIOptChooser #Arguments to pass to chooser module method_args = #Number of experiments in initial grid grid_size = 20000 -#The seed used to initialize initial grid (int) -grid_seed = 1 #Does not need to be specified, when experiment directory with a 'config.pb' is #present. For Syntax see 'config.bp' in braninpy directory config = config.pb -#Run in job_wrapper mode (bool) -wrapper = 0 #The time in_between successive polls for results (float) spearmint_polling_time = 3.0 -#General Parameters -max_concurrent = 1 -#max_finished_jobs = %(number_of_jobs)s # either relative to __file__ or absolute path_to_optimizer = ./spearmint_april2013_mod_src diff --git a/optimizers/spearmint/spearmint_gitfork_mod.py b/optimizers/spearmint/spearmint_gitfork_mod.py new file mode 100644 index 00000000..7b7592ef --- /dev/null +++ b/optimizers/spearmint/spearmint_gitfork_mod.py @@ -0,0 +1,127 @@ +## +# wrapping: A program making it easy to use hyperparameter +# optimization software. +# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import cPickle +import logging +import os +import sys + +import numpy as np + +import HPOlib.wrapping_util + + +__authors__ = ["Katharina Eggensperger", "Matthias Feurer"] +__contact__ = "automl.org" + + +logger = logging.getLogger("HPOlib.spearmint_april2013_mod") + + +path_to_optimizer = "optimizers/spearmint_march2014_mod/" +version_info = ("# %76s #\n" % "git version march 2014") + + +def check_dependencies(): + try: + import google.protobuf + try: + from google.protobuf.internal import enum_type_wrapper + except ImportError: + raise ImportError("Installed google.protobuf version is too old, you need at least 2.5.0") + except ImportError: + raise ImportError("Google protobuf cannot be imported. Are you sure " + "it's installed?") + try: + import numpy + except ImportError: + raise ImportError("Numpy cannot be imported. Are you sure that it's" + " installed?") + try: + import scipy + except ImportError: + raise ImportError("Scipy cannot be imported. Are you sure that it's" + " installed?") + + +def build_spearmint_call(config, options, optimizer_dir): + os.environ['PYTHONPATH'] = os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint') + \ + os.pathsep + os.environ['PYTHONPATH'] + print os.environ['PYTHONPATH'] + call = 'python ' + \ + os.path.join(config.get('SPEARMINT', 'path_to_optimizer'), 'spearmint', 'spearmint', 'main.py') + call = ' '.join([call, os.path.join(optimizer_dir, config.get('SPEARMINT', 'config')), + '--driver=local', + '--max-concurrent', config.get('HPOLIB', 'number_of_concurrent_jobs'), + '--max-finished-jobs', config.get('SPEARMINT', 'max_finished_jobs'), + '--polling-time', config.get('SPEARMINT', 'spearmint_polling_time'), + '--grid-size', config.get('SPEARMINT', 'grid_size'), + '--method', config.get('SPEARMINT', 'method'), + '--method-args=' + config.get('SPEARMINT', 'method_args'), + '--grid-seed', str(options.seed)]) + if config.get('SPEARMINT', 'method') != "GPEIChooser" and \ + config.get('SPEARMINT', 'method') != "GPEIOptChooser": + logger.warning('WARNING: This chooser might not work yet\n') + call = ' '.join([call, config.get("SPEARMINT", 'method_args')]) + return call + + +#noinspection PyUnusedLocal +def restore(config, optimizer_dir, **kwargs): + raise NotImplementedError("Restoring is not possible for this optimizer") + + +#noinspection PyUnusedLocal +def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): + # config: Loaded .cfg file + # options: Options containing seed, restore_dir, + # experiment_dir: Experiment directory/Benchmark_directory + # **kwargs: Nothing so far + + time_string = HPOlib.wrapping_util.get_time_string() + optimizer_str = os.path.splitext(os.path.basename(__file__))[0] + + # Find experiment directory + if options.restore: + if not os.path.exists(options.restore): + raise Exception("The restore directory does not exist") + optimizer_dir = options.restore + else: + optimizer_dir = os.path.join(experiment_dir, + experiment_directory_prefix + + optimizer_str + "_" + + str(options.seed) + "_" + time_string) + + # Build call + cmd = build_spearmint_call(config, options, optimizer_dir) + + # Set up experiment directory + if not os.path.exists(optimizer_dir): + os.mkdir(optimizer_dir) + # Make a link to the Protocol-Buffer config file + configpb = config.get('SPEARMINT', 'config') + if not os.path.exists(os.path.join(optimizer_dir, configpb)): + os.symlink(os.path.join(experiment_dir, optimizer_str, configpb), + os.path.join(optimizer_dir, configpb)) + logger.info("### INFORMATION ################################################################") + logger.info("# You're running %40s #" % path_to_optimizer) + logger.info("%s" % version_info) + logger.info("# A newer version might be available, but not yet built in. #") + logger.info("# Please use this version only to reproduce our results on automl.org #") + logger.info("################################################################################") + return cmd, optimizer_dir \ No newline at end of file diff --git a/optimizers/spearmint/spearmint_gitfork_modDefault.cfg b/optimizers/spearmint/spearmint_gitfork_modDefault.cfg new file mode 100644 index 00000000..85b8ef07 --- /dev/null +++ b/optimizers/spearmint/spearmint_gitfork_modDefault.cfg @@ -0,0 +1,24 @@ +[SPEARMINT] +#Which spearmint script to call +script = python spearmint_sync.py +# Which chooser to take (string) +method = GPEIOptChooser +#Arguments to pass to chooser module +method_args = +#Number of experiments in initial grid +grid_size = 20000 +#The seed used to initialize initial grid (int) +grid_seed = 1 +#Does not need to be specified, when experiment directory with a 'config.pb' is +#present. For Syntax see 'config.bp' in braninpy directory +config = config.pb +#Run in job_wrapper mode (bool) +wrapper = 0 +#The time in_between successive polls for results (float) +spearmint_polling_time = 3.0 +#General Parameters +max_concurrent = 1 +#max_finished_jobs = %(number_of_jobs)s + +# either relative to __file__ or absolute +path_to_optimizer = ./spearmint_gitfork_mod_src diff --git a/optimizers/spearmint/spearmint_gitfork_mod_parser.py b/optimizers/spearmint/spearmint_gitfork_mod_parser.py new file mode 100644 index 00000000..85df4d02 --- /dev/null +++ b/optimizers/spearmint/spearmint_gitfork_mod_parser.py @@ -0,0 +1,49 @@ +## +# wrapping: A program making it easy to use hyperparameter +# optimization software. +# Copyright (C) 2013 Katharina Eggensperger and Matthias Feurer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import logging +import os +import sys + +logger = logging.getLogger("HPOlib.optimizers.spearmint.spearmint_april2013_mod_parser") + + +def manipulate_config(config): + # special cases + if not config.has_option('SPEARMINT', 'method'): + raise Exception("SPEARMINT:method not specified in .cfg") + if not config.has_option('SPEARMINT', 'method_args'): + raise Exception("SPEARMINT:method-args not specified in .cfg") + + # GENERAL + if not config.has_option('SPEARMINT', 'max_finished_jobs'): + config.set('SPEARMINT', 'max_finished_jobs', + config.get('HPOLIB', 'number_of_jobs')) + + path_to_optimizer = config.get('SPEARMINT', 'path_to_optimizer') + if not os.path.isabs(path_to_optimizer): + path_to_optimizer = os.path.join(os.path.dirname(os.path.realpath(__file__)), path_to_optimizer) + + path_to_optimizer = os.path.normpath(path_to_optimizer) + if not os.path.exists(path_to_optimizer): + logger.critical("Path to optimizer not found: %s" % path_to_optimizer) + sys.exit(1) + + config.set('SPEARMINT', 'path_to_optimizer', path_to_optimizer) + + return config diff --git a/optimizers/spearmint/spearmint_gitfork_mod_src b/optimizers/spearmint/spearmint_gitfork_mod_src new file mode 160000 index 00000000..365464bc --- /dev/null +++ b/optimizers/spearmint/spearmint_gitfork_mod_src @@ -0,0 +1 @@ +Subproject commit 365464bc602278f22e3a195be5fd10eff253b8db diff --git a/optimizers/tpe/hyperopt_august2013_mod.py b/optimizers/tpe/hyperopt_august2013_mod.py index bc5ee359..241838d0 100644 --- a/optimizers/tpe/hyperopt_august2013_mod.py +++ b/optimizers/tpe/hyperopt_august2013_mod.py @@ -69,7 +69,7 @@ def build_tpe_call(config, options, optimizer_dir): # For TPE we have to cd to the exp_dir call = "python " + os.path.dirname(os.path.realpath(__file__)) + \ "/tpecall.py" - call = ' '.join([call, '-p', config.get('TPE', 'space'), + call = ' '.join([call, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('TPE', 'space'))), "-m", config.get('TPE', 'number_evals'), "-s", str(options.seed), "--cwd", optimizer_dir]) @@ -105,7 +105,7 @@ def restore(config, optimizer_dir, **kwargs): #noinspection PyUnusedLocal -def main(config, options, experiment_dir, **kwargs): +def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory @@ -144,7 +144,9 @@ def main(config, options, experiment_dir, **kwargs): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: - optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" + + optimizer_dir = os.path.join(experiment_dir, + experiment_directory_prefix + + optimizer_str + "_" + str(options.seed) + "_" + time_string) @@ -155,10 +157,19 @@ def main(config, options, experiment_dir, **kwargs): if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('TPE', 'space') + abs_space = os.path.abspath(space) + parent_space = os.path.join(experiment_dir, optimizer_str, space) + if os.path.exists(abs_space): + space = abs_space + elif os.path.exists(parent_space): + space = parent_space + else: + raise Exception("TPE search space not found. Searched at %s and " + "%s" % (abs_space, parent_space)) # Copy the hyperopt search space - if not os.path.exists(os.path.join(optimizer_dir, space)): + if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), - os.path.join(optimizer_dir, space)) + os.path.join(optimizer_dir, os.path.basename(space))) import hyperopt path_to_loaded_optimizer = os.path.abspath(os.path.dirname(os.path.dirname(hyperopt.__file__))) diff --git a/optimizers/tpe/hyperopt_august2013_modDefault.cfg b/optimizers/tpe/hyperopt_august2013_modDefault.cfg index 09cde416..b7a5e8ee 100644 --- a/optimizers/tpe/hyperopt_august2013_modDefault.cfg +++ b/optimizers/tpe/hyperopt_august2013_modDefault.cfg @@ -1,7 +1,5 @@ [TPE] space = space.py -#exp_dir = %(exp_directory)s/tpe/ -#number_evals = %(number_of_jobs)s # either relative to __file__ or absolute path_to_optimizer = ./hyperopt_august2013_mod_src \ No newline at end of file diff --git a/optimizers/tpe/random_hyperopt_august2013_mod.py b/optimizers/tpe/random_hyperopt_august2013_mod.py index 1d35ebcf..f5a16bca 100644 --- a/optimizers/tpe/random_hyperopt_august2013_mod.py +++ b/optimizers/tpe/random_hyperopt_august2013_mod.py @@ -73,7 +73,7 @@ def check_dependencies(): def build_random_call(config, options, optimizer_dir): call = "python " + os.path.dirname(os.path.realpath(__file__)) + \ "/tpecall.py" - call = ' '.join([call, '-p', config.get('TPE', 'space'), + call = ' '.join([call, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('TPE', 'space'))), "-m", config.get('TPE', 'number_evals'), "-s", str(options.seed), "--cwd", optimizer_dir, "--random"]) @@ -104,7 +104,7 @@ def restore(config, optimizer_dir, **kwargs): # noinspection PyUnusedLocal -def main(config, options, experiment_dir, **kwargs): +def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore, # experiment_dir: Experiment directory/Benchmarkdirectory @@ -126,7 +126,9 @@ def main(config, options, experiment_dir, **kwargs): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: - optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" + + optimizer_dir = os.path.join(experiment_dir, + experiment_directory_prefix + + optimizer_str + "_" + str(options.seed) + "_" + time_string) @@ -137,10 +139,19 @@ def main(config, options, experiment_dir, **kwargs): if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('TPE', 'space') + abs_space = os.path.abspath(space) + parent_space = os.path.join(experiment_dir, optimizer_str, space) + if os.path.exists(abs_space): + space = abs_space + elif os.path.exists(parent_space): + space = parent_space + else: + raise Exception("TPE search space not found. Searched at %s and " + "%s" % (abs_space, parent_space)) # Copy the hyperopt search space - if not os.path.exists(os.path.join(optimizer_dir, space)): + if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), - os.path.join(optimizer_dir, space)) + os.path.join(optimizer_dir, os.path.basename(space))) import hyperopt path_to_loaded_optimizer = os.path.abspath(os.path.dirname(os.path.dirname(hyperopt.__file__))) diff --git a/scripts/HPOlib-convert b/scripts/HPOlib-convert old mode 100644 new mode 100755 diff --git a/scripts/HPOlib-plot b/scripts/HPOlib-plot old mode 100644 new mode 100755 diff --git a/setup.py b/setup.py index 13a9f8e2..982fc248 100644 --- a/setup.py +++ b/setup.py @@ -251,7 +251,8 @@ def run(self): 'networkx', 'protobuf', 'scipy>=0.13.2', - 'pymongo' + 'pymongo', + 'psutil' ], author_email='eggenspk@informatik.uni-freiburg.de', description=desc, diff --git a/tests/unittests/test_wrapping_util.py b/tests/unittests/test_wrapping_util.py index 2d927149..fd2b2517 100644 --- a/tests/unittests/test_wrapping_util.py +++ b/tests/unittests/test_wrapping_util.py @@ -76,7 +76,9 @@ def test_save_config_to_file_ignore_none(self): self.assertEqual(asserted_file_content, file_content) string_stream.close() - def test_use_option_parser_with_config(self): + def test_parse_config_values_from_unknown_arguments(self): + """Test if we can convert a config with Sections and variables into an + argparser.""" sys.argv = ['wrapping.py', '-s', '1', '-t', 'DBNet', '-o', 'SMAC', '--HPOLIB:number_of_jobs', '2'] args, unknown = wrapping.use_arg_parser() @@ -85,7 +87,26 @@ def test_use_option_parser_with_config(self): config.read("dummy_config.cfg") config_args = wrapping_util.parse_config_values_from_unknown_arguments( unknown, config) - self.assertEqual(vars(config_args)['HPOLIB:number_of_jobs'], '2') + self.assertListEqual(vars(config_args)['HPOLIB:number_of_jobs'], ['2']) + self.assertIs(vars(config_args)['GRIDSEARCH:params'], None) + self.assertIs(vars(config_args)['HPOLIB:function'], None) + self.assertIs(vars(config_args)['HPOLIB:result_on_terminate'], None) + + def test_parse_config_values_from_unknown_arguments2(self): + """Test if we can convert a config with Sections and variables into an + argparser. Test for arguments with whitespaces""" + sys.argv = ['wrapping.py', '-s', '1', '-t', 'DBNet', '-o', 'SMAC', + '--HPOLIB:function', 'python', '../branin.py'] + args, unknown = wrapping.use_arg_parser() + self.assertEqual(len(unknown), 3) + config = ConfigParser.SafeConfigParser(allow_no_value=True) + config.read("dummy_config.cfg") + config_args = wrapping_util.parse_config_values_from_unknown_arguments( + unknown, config) + self.assertListEqual(vars(config_args)['HPOLIB:function'], ['python', + '../branin.py']) + self.assertIs(vars(config_args)['GRIDSEARCH:params'], None) + self.assertIs(vars(config_args)['HPOLIB:result_on_terminate'], None) def test_nan_mean(self): self.assertEqual(wrapping_util.nan_mean(np.array([1, 5])), 3)