Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
KEggensperger committed Mar 25, 2014
2 parents f7a910a + 0da085e commit 738acd3
Show file tree
Hide file tree
Showing 143 changed files with 11,238 additions and 3,781 deletions.
17 changes: 17 additions & 0 deletions .gitignore
@@ -1,5 +1,16 @@
*.py[cod]

# Optimizers
HPOlib/optimizers/smac_2_06_01-dev/*
HPOlib/optimizers/spearmint_april2013_mod/*
HPOlib/optimizers/hyperopt_august2013_mod/*

# Runsolver
runsolver/src/*

# benchmark runs
HPOlib/benchmarks/*/*_*/*

# C extensions
*.so

Expand Down Expand Up @@ -34,3 +45,9 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

# pycharm
.idea

# Others
*~
16 changes: 16 additions & 0 deletions .travis.yml
@@ -0,0 +1,16 @@
language: python
python:
- "2.7"
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors

before_install:
- sudo apt-get install -q libatlas3gf-base libatlas-dev liblapack-dev gfortran

install:
- easy_install -U distribute
- pip install numpy
- pip install scipy==0.13.2
- pip install matplotlib
- python setup.py install
# command to run tests, e.g. python setup.py test
script: python setup.py test
86 changes: 58 additions & 28 deletions Experiment.py → HPOlib/Experiment.py
Expand Up @@ -17,18 +17,26 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import cPickle
import logging
import os
import scipy
from scipy.stats.distributions import wrapcauchy_gen
import sys
import tempfile

import numpy as np

import Locker
import HPOlib.Locker as Locker
import HPOlib.wrapping_util as wrapping_util


__authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
__contact__ = "automl.org"


logger = logging.getLogger("HPOlib.experiment")


CANDIDATE_STATE = 0
INCOMPLETE_STATE = 1
RUNNING_STATE = 2
Expand All @@ -48,10 +56,10 @@ def __init__(self, expt_dir, expt_name, max_wallclock_time=
self.locker = Locker.Locker()

# Only one process at a time is allowed to have access to this.
sys.stderr.write("Waiting to lock experiments file " +
self.jobs_pkl + "...")
#logger.info("Waiting to lock experiments file " +
# self.jobs_pkl + "...")
self.locker.lock_wait(self.jobs_pkl)
sys.stderr.write("...acquired\n")
#logger.info("...acquired\n")

# Does this exist already?
if not os.path.exists(self.jobs_pkl):
Expand Down Expand Up @@ -118,7 +126,8 @@ def create_trial(self):
def __del__(self):
self._save_jobs()
if self.locker.unlock(self.jobs_pkl):
sys.stderr.write("Released lock on job grid.\n")
pass
# sys.stderr.write("Released lock on job grid.\n")
else:
raise Exception("Could not release lock on job grid.\n")

Expand Down Expand Up @@ -158,20 +167,36 @@ def get_complete_jobs(self):
def get_broken_jobs(self):
return np.nonzero(self.status_array() == BROKEN_STATE)[0]

# Get the best value so far
def get_best(self):
best = 0
# Get the job id of the best value so far, if there is no result
# available, this method consults the instance_results. If there are more
# than one trials with the same response value, the first trial is
# considered to be the best. If no trial with a better response value
# than sys.maxint is found, a ValueError is raised.
# TODO: add a method that incomplete jobs are not considered
def get_arg_best(self):
best_idx = -1
best_value = sys.maxint
for i, trial in enumerate(self.trials):
res = np.NaN
if trial['result'] == trial['result']:
res = trial['result']
tmp_res = np.NaN
if np.isfinite(trial['result']):
tmp_res = trial['result']
elif np.isfinite(trial['instance_results']).any():
res = scipy.nanmean(trial['instance_results'])
tmp_res = wrapping_util.nan_mean(trial['instance_results'])
# np.nanmean is not available in older numpy versions
# tmp_res = scipy.nanmean(trial['instance_results'])
else:
continue
if res < self.trials[best]:
best = i
return self.trials[best]
if tmp_res < best_value:
best_idx = i
best_value = tmp_res
if best_idx == -1:
raise ValueError("No best value found.")
return best_idx

# Get the best value so far, for more documentation see get_arg_best
def get_best(self):
best_idx = self.get_arg_best()
return self.trials[best_idx]

def get_trial_from_id(self, _id):
return self.trials[_id]
Expand Down Expand Up @@ -203,8 +228,8 @@ def set_one_fold_crashed(self, _id, fold, result, duration):
self.trials[_id]['instance_status'][fold] = BROKEN_STATE
self.trials[_id]['instance_durations'][fold] = duration
self.trials[_id]['instance_results'][fold] = result
if (self.get_trial_from_id(_id)['instance_status'] != RUNNING_STATE).all():
self.get_trial_from_id(_id)['status'] = INCOMPLETE_STATE
if (self.trials[_id]['instance_status'] != RUNNING_STATE).all():
self.trials[_id]['status'] = INCOMPLETE_STATE
self.check_cv_finished(_id)
self.total_wallclock_time += duration
self._sanity_check()
Expand All @@ -218,8 +243,8 @@ def set_one_fold_complete(self, _id, fold, result, duration):
self.get_trial_from_id(_id)['instance_status'][fold] = COMPLETE_STATE
self.get_trial_from_id(_id)['instance_durations'][fold] = duration
# Set to incomplete if no job is running
if (self.get_trial_from_id(_id)['instance_status'] != RUNNING_STATE).all():
self.get_trial_from_id(_id)['status'] = INCOMPLETE_STATE
if (self.trials[_id]['instance_status'] != RUNNING_STATE).all():
self.trials[_id]['status'] = INCOMPLETE_STATE
# Check if all runs are finished
self.check_cv_finished(_id)
self.total_wallclock_time += duration
Expand All @@ -243,9 +268,13 @@ def check_cv_finished(self, _id):
self.get_trial_from_id(_id)['status'] = BROKEN_STATE
else:
self.get_trial_from_id(_id)['status'] = COMPLETE_STATE
self.get_trial_from_id(_id)['result'] = np.sum(self.get_trial_from_id(_id)['instance_results']) / self.folds
self.get_trial_from_id(_id)['std'] = np.std(self.get_trial_from_id(_id)['instance_results'])
self.get_trial_from_id(_id)['duration'] = np.sum(self.get_trial_from_id(_id)['instance_durations'])
self.get_trial_from_id(_id)['result'] = \
np.sum(self.get_trial_from_id(_id)['instance_results'])\
/ self.folds
self.get_trial_from_id(_id)['std'] =\
np.std(self.get_trial_from_id(_id)['instance_results'])
self.get_trial_from_id(_id)['duration'] =\
np.sum(self.get_trial_from_id(_id)['instance_durations'])
return True
else:
return False
Expand All @@ -254,13 +283,13 @@ def check_cv_finished(self, _id):
# parameters. Useful to delete all unnecessary entries after a crash in order
# to restart
def remove_all_but_first_runs(self, restored_runs):
print "#########Restored runs", restored_runs
print self.instance_order, len(self.instance_order)
logger.info("Restored runs %d", restored_runs)
logger.info("%s %s" ,self.instance_order, len(self.instance_order))
if len(self.instance_order) == restored_runs:
pass
else:
for _id, instance in self.instance_order[-1:restored_runs - 1:-1]:
print "Deleting", _id, instance
logger.info("Deleting %d %d", _id, instance)
if np.isfinite(self.trials[_id]['instance_durations'][instance]):
self.total_wallclock_time -= \
self.trials[_id]['instance_durations'][instance]
Expand Down Expand Up @@ -326,12 +355,13 @@ def _sanity_check(self):
# Backwards compability with numpy 1.6
wallclock_time = np.nansum(trial['instance_durations'])
total_wallclock_time += wallclock_time if np.isfinite(wallclock_time) else 0
assert (total_wallclock_time == self.total_wallclock_time), \
assert (wrapping_util.float_eq(total_wallclock_time,
self.total_wallclock_time)), \
(total_wallclock_time, self.total_wallclock_time)

# Automatically loads this object from a pickle file
def _load_jobs(self):
fh = open(self.jobs_pkl, 'r')
fh = open(self.jobs_pkl, 'r')
jobs = cPickle.load(fh)
fh.close()

Expand Down Expand Up @@ -368,4 +398,4 @@ def _save_jobs(self):
'trials' : self.trials}, fh)
fh.close()
cmd = 'mv "%s" "%s"' % (fh.name, self.jobs_pkl)
os.system(cmd) # TODO: Replace with subprocess modules
os.system(cmd) # TODO: Replace with subprocess modules
15 changes: 9 additions & 6 deletions Locker.py → HPOlib/Locker.py
Expand Up @@ -19,12 +19,16 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import logging
import os
import sys
import time


logger = logging.getLogger("HPOlib.locker")


def safe_delete(filename):
cmd = 'mv "%s" "%s.delete" && rm "%s.delete"' % (filename, filename,
cmd = 'mv "%s" "%s.delete" && rm "%s.delete"' % (filename, filename,
filename)
fail = os.system(cmd)
return not fail
Expand All @@ -44,21 +48,20 @@ def lock(self, filename):
self.locks[filename] += 1
return True
else:
cmd = 'ln -s /dev/null "%s.lock" 2> /dev/null' % (filename)
cmd = 'ln -s /dev/null "%s.lock" 2> /dev/null' % filename
fail = os.system(cmd)
if not fail:
self.locks[filename] = 1
return not fail

def unlock(self, filename):
if not self.locks.has_key(filename):
sys.stderr.write("Trying to unlock not-locked file %s.\n" %
(filename))
logger.info("Trying to unlock not-locked file %s.\n", filename)
return True
if self.locks[filename] == 1:
success = safe_delete('%s.lock' % (filename))
if not success:
sys.stderr.write("Could not unlock file: %s.\n" % (filename))
logger.log("Could not unlock file: %s.\n", filename)
del self.locks[filename]
return success
else:
Expand Down
1 change: 1 addition & 0 deletions HPOlib/Plotting/__init__.py
@@ -0,0 +1 @@

0 comments on commit 738acd3

Please sign in to comment.