Merge branch 'development'

automl · Mar 25, 2014 · 738acd3 · 738acd3
2 parents f7a910a + 0da085e
commit 738acd3
Show file tree

Hide file tree

Showing 143 changed files with 11,238 additions and 3,781 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,16 @@
 *.py[cod]
 
+# Optimizers
+HPOlib/optimizers/smac_2_06_01-dev/*
+HPOlib/optimizers/spearmint_april2013_mod/*
+HPOlib/optimizers/hyperopt_august2013_mod/*
+
+# Runsolver
+runsolver/src/*
+
+# benchmark runs
+HPOlib/benchmarks/*/*_*/*
+
 # C extensions
 *.so
 
@@ -34,3 +45,9 @@ nosetests.xml
 .mr.developer.cfg
 .project
 .pydevproject
+
+# pycharm
+.idea
+
+# Others
+*~
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,16 @@
+language: python
+python:
+  - "2.7"
+# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+
+before_install:
+  - sudo apt-get install -q libatlas3gf-base libatlas-dev liblapack-dev gfortran
+
+install:
+  - easy_install -U distribute
+  - pip install numpy
+  - pip install scipy==0.13.2
+  - pip install matplotlib
+  - python setup.py install
+# command to run tests, e.g. python setup.py test
+script:  python setup.py test
diff --git a/Experiment.py → HPOlib/Experiment.py b/Experiment.py → HPOlib/Experiment.py
@@ -17,18 +17,26 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import cPickle
+import logging
 import os
 import scipy
+from scipy.stats.distributions import wrapcauchy_gen
 import sys
 import tempfile
 
 import numpy as np
 
-import Locker
+import HPOlib.Locker as Locker
+import HPOlib.wrapping_util as wrapping_util
+
 
 __authors__ = ["Katharina Eggensperger", "Matthias Feurer"]
 __contact__ = "automl.org"
 
+
+logger = logging.getLogger("HPOlib.experiment")
+
+
 CANDIDATE_STATE = 0
 INCOMPLETE_STATE = 1
 RUNNING_STATE = 2
@@ -48,10 +56,10 @@ def __init__(self, expt_dir, expt_name, max_wallclock_time=
         self.locker = Locker.Locker()
 
         # Only one process at a time is allowed to have access to this.
-        sys.stderr.write("Waiting to lock experiments file " +
-                         self.jobs_pkl + "...")
+        #logger.info("Waiting to lock experiments file " +
+        #                 self.jobs_pkl + "...")
         self.locker.lock_wait(self.jobs_pkl)
-        sys.stderr.write("...acquired\n")
+        #logger.info("...acquired\n")
 
         # Does this exist already?
         if not os.path.exists(self.jobs_pkl):
@@ -118,7 +126,8 @@ def create_trial(self):
     def __del__(self):
         self._save_jobs()
         if self.locker.unlock(self.jobs_pkl):
-            sys.stderr.write("Released lock on job grid.\n")
+            pass
+            #    sys.stderr.write("Released lock on job grid.\n")
         else:
             raise Exception("Could not release lock on job grid.\n")
 
@@ -158,20 +167,36 @@ def get_complete_jobs(self):
     def get_broken_jobs(self):
         return np.nonzero(self.status_array() == BROKEN_STATE)[0]
 
-    # Get the best value so far
-    def get_best(self):
-        best = 0
+    # Get the job id of the best value so far, if there is no result
+    # available, this method consults the instance_results. If there are more
+    #  than one trials with the same response value, the first trial is
+    # considered to be the best. If no trial with a better response value
+    # than sys.maxint is found, a ValueError is raised.
+    # TODO: add a method that incomplete jobs are not considered
+    def get_arg_best(self):
+        best_idx = -1
+        best_value = sys.maxint
         for i, trial in enumerate(self.trials):
-            res = np.NaN
-            if trial['result'] == trial['result']:
-                res = trial['result']
+            tmp_res = np.NaN
+            if np.isfinite(trial['result']):
+                tmp_res = trial['result']
             elif np.isfinite(trial['instance_results']).any():
-                res = scipy.nanmean(trial['instance_results'])
+                tmp_res = wrapping_util.nan_mean(trial['instance_results'])
+                # np.nanmean is not available in older numpy versions
+                # tmp_res = scipy.nanmean(trial['instance_results'])
             else:
                 continue
-            if res < self.trials[best]:
-                best = i
-        return self.trials[best]
+            if tmp_res < best_value:
+                best_idx = i
+                best_value = tmp_res
+        if best_idx == -1:
+            raise ValueError("No best value found.")
+        return best_idx
+
+    # Get the best value so far, for more documentation see get_arg_best
+    def get_best(self):
+        best_idx = self.get_arg_best()
+        return self.trials[best_idx]
 
     def get_trial_from_id(self, _id):
         return self.trials[_id]
@@ -203,8 +228,8 @@ def set_one_fold_crashed(self, _id, fold, result, duration):
         self.trials[_id]['instance_status'][fold] = BROKEN_STATE
         self.trials[_id]['instance_durations'][fold] = duration
         self.trials[_id]['instance_results'][fold] = result
-        if (self.get_trial_from_id(_id)['instance_status'] != RUNNING_STATE).all():
-            self.get_trial_from_id(_id)['status'] = INCOMPLETE_STATE
+        if (self.trials[_id]['instance_status'] != RUNNING_STATE).all():
+            self.trials[_id]['status'] = INCOMPLETE_STATE
         self.check_cv_finished(_id)
         self.total_wallclock_time += duration
         self._sanity_check()
@@ -218,8 +243,8 @@ def set_one_fold_complete(self, _id, fold, result, duration):
         self.get_trial_from_id(_id)['instance_status'][fold] = COMPLETE_STATE
         self.get_trial_from_id(_id)['instance_durations'][fold] = duration
         # Set to incomplete if no job is running
-        if (self.get_trial_from_id(_id)['instance_status'] != RUNNING_STATE).all():
-            self.get_trial_from_id(_id)['status'] = INCOMPLETE_STATE
+        if (self.trials[_id]['instance_status'] != RUNNING_STATE).all():
+            self.trials[_id]['status'] = INCOMPLETE_STATE
         # Check if all runs are finished
         self.check_cv_finished(_id)
         self.total_wallclock_time += duration
@@ -243,9 +268,13 @@ def check_cv_finished(self, _id):
                 self.get_trial_from_id(_id)['status'] = BROKEN_STATE
             else:
                 self.get_trial_from_id(_id)['status'] = COMPLETE_STATE
-            self.get_trial_from_id(_id)['result'] = np.sum(self.get_trial_from_id(_id)['instance_results']) / self.folds
-            self.get_trial_from_id(_id)['std'] = np.std(self.get_trial_from_id(_id)['instance_results'])
-            self.get_trial_from_id(_id)['duration'] = np.sum(self.get_trial_from_id(_id)['instance_durations'])
+            self.get_trial_from_id(_id)['result'] = \
+                np.sum(self.get_trial_from_id(_id)['instance_results'])\
+                / self.folds
+            self.get_trial_from_id(_id)['std'] =\
+                np.std(self.get_trial_from_id(_id)['instance_results'])
+            self.get_trial_from_id(_id)['duration'] =\
+                np.sum(self.get_trial_from_id(_id)['instance_durations'])
             return True
         else:
             return False
@@ -254,13 +283,13 @@ def check_cv_finished(self, _id):
     # parameters. Useful to delete all unnecessary entries after a crash in order
     # to restart
     def remove_all_but_first_runs(self, restored_runs):
-        print "#########Restored runs", restored_runs
-        print self.instance_order, len(self.instance_order)
+        logger.info("Restored runs %d", restored_runs)
+        logger.info("%s %s" ,self.instance_order, len(self.instance_order))
         if len(self.instance_order) == restored_runs:
             pass
         else:
             for _id, instance in self.instance_order[-1:restored_runs - 1:-1]:
-                print "Deleting", _id, instance
+                logger.info("Deleting %d %d", _id, instance)
                 if np.isfinite(self.trials[_id]['instance_durations'][instance]):
                     self.total_wallclock_time -= \
                         self.trials[_id]['instance_durations'][instance]
@@ -326,12 +355,13 @@ def _sanity_check(self):
             # Backwards compability with numpy 1.6
             wallclock_time = np.nansum(trial['instance_durations'])
             total_wallclock_time += wallclock_time if np.isfinite(wallclock_time) else 0
-        assert (total_wallclock_time == self.total_wallclock_time), \
+        assert (wrapping_util.float_eq(total_wallclock_time,
+                                       self.total_wallclock_time)), \
             (total_wallclock_time, self.total_wallclock_time)
 
     # Automatically loads this object from a pickle file
     def _load_jobs(self):
-        fh   = open(self.jobs_pkl, 'r')
+        fh = open(self.jobs_pkl, 'r')
         jobs = cPickle.load(fh)
         fh.close()
 
@@ -368,4 +398,4 @@ def _save_jobs(self):
                        'trials'               : self.trials}, fh)
         fh.close()
         cmd = 'mv "%s" "%s"' % (fh.name, self.jobs_pkl)
-        os.system(cmd)  # TODO: Replace with subprocess modules
+        os.system(cmd)  # TODO: Replace with subprocess modules
diff --git a/Locker.py → HPOlib/Locker.py b/Locker.py → HPOlib/Locker.py
@@ -19,12 +19,16 @@
 # 
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import logging
 import os
-import sys
 import time
 
+
+logger = logging.getLogger("HPOlib.locker")
+
+
 def safe_delete(filename):
-    cmd  = 'mv "%s" "%s.delete" && rm "%s.delete"' % (filename, filename, 
+    cmd = 'mv "%s" "%s.delete" && rm "%s.delete"' % (filename, filename,
                                                       filename)
     fail = os.system(cmd)
     return not fail
@@ -44,21 +48,20 @@ def lock(self, filename):
             self.locks[filename] += 1
             return True
         else:
-            cmd = 'ln -s /dev/null "%s.lock" 2> /dev/null' % (filename)
+            cmd = 'ln -s /dev/null "%s.lock" 2> /dev/null' % filename
             fail = os.system(cmd)
             if not fail:
                 self.locks[filename] = 1
             return not fail
 
     def unlock(self, filename):
         if not self.locks.has_key(filename):
-            sys.stderr.write("Trying to unlock not-locked file %s.\n" % 
-                             (filename))
+            logger.info("Trying to unlock not-locked file %s.\n", filename)
             return True
         if self.locks[filename] == 1:
             success = safe_delete('%s.lock' % (filename))
             if not success:
-                sys.stderr.write("Could not unlock file: %s.\n" % (filename))
+                logger.log("Could not unlock file: %s.\n", filename)
             del self.locks[filename]
             return success
         else:

diff --git a/HPOlib/Plotting/__init__.py b/HPOlib/Plotting/__init__.py
@@ -0,0 +1 @@
+