From 271b775e72d2b2aedbda901bfad541a4f5aa487f Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 22 Feb 2019 18:18:18 +0200 Subject: [PATCH 01/26] edited ParamGrid.py to avoid conflict in v.0.5 --- talos/parameters/ParamGrid.py | 51 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index ab77b322..91e28daa 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -1,7 +1,8 @@ -from numpy import arange, unique, array, column_stack, random +import numpy as np from ..reducers.sample_reducer import sample_reducer + class ParamGrid: '''Suite for handling parameters internally within Talos @@ -19,12 +20,13 @@ def __init__(self, main_self): # convert the input to useful format self._p = self._param_input_conversion() + # create a list of lists, each list being a parameter sequence ls = [list(self._p[key]) for key in self._p.keys()] - virtual_grid_size = 1 - for l in ls: - virtual_grid_size *= len(l) + # get the number of total dimensions / permutations + virtual_grid_size = np.prod([len(l) for l in ls]) final_grid_size = virtual_grid_size + # calculate the size of the downsample if self.main_self.grid_downsample is not None: final_grid_size = int(virtual_grid_size * self.main_self.grid_downsample) @@ -40,36 +42,35 @@ def __init__(self, main_self): out = range(0, final_grid_size) # build the parameter permutation grid - self.param_grid = self._param_grid(ls, out) + self.param_grid = self._create_param_permutations(ls, out) # initialize with random shuffle if needed if self.main_self.shuffle: - random.shuffle(self.param_grid) + np.random.shuffle(self.param_grid) # create a index for logging purpose self.param_log = list(range(len(self.param_grid))) # add the log index to param grid - self.param_grid = column_stack((self.param_grid, self.param_log)) + self.param_grid = np.column_stack((self.param_grid, self.param_log)) - def _param_grid(self, ls, product_indices): + def _create_param_permutations(self, ls, permutation_index): - '''CREATE THE PARAMETER PERMUTATIONS + '''Expand params dictionary to permutations - This is done once before starting the experiment. - Takes in the parameter dictionary, and returns - every possible permutation in an array. + Takes the input params dictionary and expands it to + actual parameter permutations for the experiment. ''' - prod = [] - for i in product_indices: # the product indices are the output of our random function + final_grid = [] + for i in permutation_index: p = [] for l in reversed(ls): - i, s = divmod(int(i), len(l)) # NOTE i is updated shifting away the information for this parameter + i, s = divmod(int(i), len(l)) p.insert(0, l[s]) - prod.append(tuple(p)) + final_grid.append(tuple(p)) - _param_grid_out = array(prod, dtype='object') + _param_grid_out = np.array(final_grid, dtype='object') return _param_grid_out @@ -99,23 +100,21 @@ def _param_input_conversion(self): def _param_range(self, start, end, n): - '''PARAMETER RANGE - - Deals with the format where a start, end - and steps values are given for a parameter - in a tuple format. + '''Deal with ranged inputs in params dictionary - This is called internally from param_format() + A helper function to handle the cases where params + dictionary input is in the format (start, end, steps) + and is called internally through ParamGrid(). ''' try: - out = arange(start, end, (end - start) / n, dtype=float) + out = np.arange(start, end, (end - start) / n, dtype=float) # this is for python2 except ZeroDivisionError: - out = arange(start, end, (end - start) / float(n), dtype=float) + out = np.arange(start, end, (end - start) / float(n), dtype=float) if type(start) == int and type(end) == int: out = out.astype(int) - out = unique(out) + out = np.unique(out) return out From e03b1da510463be2b74dbbffc633ac7973da3869 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 22 Feb 2019 18:19:43 +0200 Subject: [PATCH 02/26] major namespace cleanup and experimental automl tools 1) experimental automl capabilities: - added Params() for generating parameter dictionaries - added KerasModel() for generating network architectures 2) namespace cleaning: The namespace now consist only of actionable/useful items. The top-level consist of commands which all are classes so can be identified being camel-case and utils and templates lead to second-level items. Check it out for yourself to learn more. Further, examples has a third-level which is datasets, models, and pipelines. 4) fixes and cleanups - cleaned up the naming in ParamGrid - changed all the numpy imports to come from np - improved scan_object.data to numeric conversion 4) added new tests --- setup.py | 4 +- talos/__init__.py | 38 +++-- talos/commands/kerasmodel.py | 108 ++++++++++++++ talos/commands/params.py | 189 +++++++++++++++++++++++++ talos/examples/__init__.py | 5 +- talos/examples/datasets.py | 27 ++-- talos/examples/models.py | 68 +++++---- talos/examples/params.py | 15 +- talos/examples/pipelines.py | 35 ++--- talos/metrics/keras_metrics.py | 11 +- talos/parameters/ParamGrid.py | 50 ++++--- talos/scan/Scan.py | 2 +- talos/utils/__init__.py | 10 ++ talos/utils/string_cols_to_numeric.py | 5 +- test/core_tests/test_auto_scan.py | 21 +++ test/core_tests/test_params_object.py | 43 ++++++ test/core_tests/test_random_methods.py | 4 +- test/core_tests/test_scan.py | 35 ++--- test/core_tests/test_scan_object.py | 4 +- test_script.py | 6 +- 20 files changed, 537 insertions(+), 143 deletions(-) create mode 100644 talos/commands/kerasmodel.py create mode 100644 talos/commands/params.py create mode 100644 test/core_tests/test_auto_scan.py create mode 100644 test/core_tests/test_params_object.py diff --git a/setup.py b/setup.py index a11fc79b..7ce22d89 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ DESCRIPTION = "Talos Hyperparameter Tuning for Keras" LONG_DESCRIPTION = """\ Talos radically changes the ordinary Keras workflow by -fullyautomating hyperparameter tuning and model evaluation. +fully automating hyperparameter tuning and model evaluation. Talos exposes Keras functionality entirely and there is no new syntax or templates to learn. """ @@ -16,7 +16,7 @@ URL = 'http://autonom.io' LICENSE = 'MIT' DOWNLOAD_URL = 'https://github.com/autonomio/talos/' -VERSION = '0.4.9' +VERSION = '0.5.0' try: from setuptools import setup diff --git a/talos/__init__.py b/talos/__init__.py index cd70b010..1df2c8e0 100755 --- a/talos/__init__.py +++ b/talos/__init__.py @@ -6,20 +6,36 @@ from .commands.evaluate import Evaluate from .commands.restore import Restore from .commands.autom8 import Autom8 +from .commands.params import Params +from .commands.kerasmodel import KerasModel +from . import utils +from . import examples as templates -# other internal imports -from .examples import datasets, params +# the purpose of everything below is to keep the namespace completely clean -# external append imports -import sklearn.metrics as performance +del_from_utils = ['best_model', 'connection_check', 'detector', + 'exceptions', 'last_neuron', 'load_model', 'validation_split', + 'pred_class', 'results', 'string_cols_to_numeric'] -from .utils.connection_check import is_connected +for key in del_from_utils: + if key.startswith('__') is False: + delattr(utils, key) -if is_connected() is True: - import astetik as plots -else: - print("NO INTERNET CONNECTION: Reporting plots will not work.") +template_sub = [templates.datasets, + templates.models, + templates.params, + templates.pipelines] -from kerasplotlib import TrainingLog as live +keep_from_templates = ['iris', 'cervical_cancer', 'titanic', 'breast_cancer', + 'icu_mortality'] -__version__ = "0.4.9" +for sub in template_sub: + for key in list(sub.__dict__): + if key.startswith('__') is False: + if key not in keep_from_templates: + delattr(sub, key) + +del commands, parameters, scan, reducers, model, metrics, key, del_from_utils +del examples, sub, keep_from_templates, template_sub + +__version__ = "0.5.0" diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py new file mode 100644 index 00000000..1b7060f9 --- /dev/null +++ b/talos/commands/kerasmodel.py @@ -0,0 +1,108 @@ +import numpy as np + +from talos.model.layers import hidden_layers +from talos.model.normalizers import lr_normalizer + +from keras.models import Sequential +from keras.layers import Dropout, Flatten +from keras.layers import LSTM, Conv1D, SimpleRNN, Dense + +try: + from wrangle.reshape_to_conv1d import reshape_to_conv1d as array_reshape_conv1d +except ImportError: + from wrangle import array_reshape_conv1d + + +class KerasModel: + + def __init__(self): + + '''An input model for Scan(). Optimized for being used together with + Params(). For example: + + Scan(x=x, y=y, params=Params().params, model=KerasModel().model) + + NOTE: the grid from Params() is very large, so grid_downsample or + round_limit accordingly in Scan(). + + ''' + + self.model = self._create_input_model + + def _create_input_model(self, x_train, y_train, x_val, y_val, params): + + model = Sequential() + + if params['network'] == 'conv1d': + x, model = _add_conv1d(x_train, model, params['first_neuron'], x_train.shape[1]) + + if params['network'] == 'lstm': + x, model = _add_lstm(x_train, model, params['first_neuron']) + + if params['network'] == 'simplernn': + x, model = _add_simplernn(x_train, model, params['first_neuron']) + + if params['network'] == 'dense': + model.add(Dense(params['first_neuron'], + input_dim=x_train.shape[1], + activation='relu')) + + model.add(Dropout(params['dropout'])) + + # add hidden layers to the model + hidden_layers(model, params, 1) + + # output layer (this is scetchy) + try: + last_neuron = y_train.shape[1] + except IndexError: + if len(np.unique(y_train)) == 2: + last_neuron = 1 + else: + last_neuron = len(np.unique(y_train)) + + model.add(Dense(last_neuron, + activation=params['last_activation'])) + + # bundle the optimizer with learning rate changes + optimizer = params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])) + + # compile the model + model.compile(optimizer=optimizer, + loss=params['loss'], + metrics=['acc']) + + # fit the model + out = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=params['epochs'], + verbose=0, + validation_data=[x_val, y_val]) + + # pass the output to Talos + return out, model + + +def _add_conv1d(x, model, filters, kernel_size): + + x = array_reshape_conv1d(x) + model.add(Conv1D(filters, kernel_size)) + model.add(Flatten()) + + return x, model + + +def _add_lstm(x, model, units): + + x = array_reshape_conv1d(x) + model.add(LSTM(units)) + + return x, model + + +def _add_simplernn(x, model, units): + + x = array_reshape_conv1d(x) + model.add(SimpleRNN(units)) + + return x, model diff --git a/talos/commands/params.py b/talos/commands/params.py new file mode 100644 index 00000000..53be6ab6 --- /dev/null +++ b/talos/commands/params.py @@ -0,0 +1,189 @@ +import numpy as np +from keras.optimizers import Adam, Nadam, Adadelta, SGD + + +loss = {'binary': ['binary_crossentropy', 'logcosh'], + 'multi_class': ['sparse_categorical_crossentropy'], + 'multi_label': ['categorical_crossentropy'], + 'continuous': ['mae']} + +last_activation = {'binary': ['sigmoid'], + 'multi_class': ['softmax'], + 'multi_label': ['softmax'], + 'continuous': [None]} + + +class Params: + + def __init__(self, + params=None, + task='binary', + replace=True, + auto=True, + network=True): + + '''A facility for generating or appending params dictionary. + + params : dict or None + task : str + 'binary', 'multi_class', 'multi_label', or 'continuous' + replace : bool + Replace current dictionary entries with new ones. + auto : bool + Automatically generate or append params dictionary with + all available parameters. + network : bool + Adds several network architectures as parameters. This is to be + used as an input together with KerasModel(). If False then only + 'dense' will be added. + ''' + + self.task = task + self.replace = replace + self.network = network + + if params is None: + self.params = {} + else: + self.params = params + + if auto: + self.automated() + + def automated(self, shapes='fixed'): + + '''Automatically generate a comprehensive + parameter dict to be used in Scan() + + shapes : string + Either 'fixed' or 'sloped' + + ''' + + if shapes == 'fixed': + self.shapes() + else: + self.shapes_slope() + self.layers() + self.dropout() + self.optimizers() + self.activations() + self.neurons() + self.losses() + self.batch_size() + self.epochs() + self.kernel_initializers() + self.lr() + if self.network: + self.networks() + else: + self.params['network'] = 'dense' + self.last_activations() + + def shapes(self): + + '''Uses triangle, funnel, and brick shapes.''' + + self._append_params('shapes', ['triangle', 'funnel', 'brick']) + + def shapes_slope(self): + + '''Uses a single decimal float for values below 0.5 to + reduce the width of the following layer.''' + + self._append_params('shapes', np.arange(0, .6, 0.1).tolist()) + + def layers(self, max_layers=6): + + self._append_params('hidden_layers', list(range(max_layers))) + + def dropout(self): + + '''Dropout from 0.0 to 0.75''' + + self._append_params('dropout', np.round(np.arange(0, .85, 0.1), 2).tolist()) + + def optimizers(self, task='binary'): + + '''Adam, Nadam, SGD, and adadelta.''' + self._append_params('optimizer', [Adam, Nadam, Adadelta, SGD]) + + def activations(self): + + self._append_params('activation', ['relu', 'elu']) + + def losses(self): + + self._append_params('loss', loss[self.task]) + + def neurons(self, bottom_value=8, max_value=None, steps=None): + + '''max_value and steps has to be either None or + integer value at the same time.''' + + if max_value is None and steps is None: + values = [int(np.exp2(i)) for i in range(3, 11)] + else: + values = range(bottom_value, max_value, steps) + + self._append_params('first_neuron', values) + + def batch_size(self, bottom_value=8, max_value=None, steps=None): + + '''max_value and steps has to be either None or + integer value at the same time.''' + + if max_value is None and steps is None: + values = [int(np.exp2(i/2)) for i in range(3, 15)] + else: + values = range(bottom_value, max_value, steps) + + self._append_params('batch_size', values) + + def epochs(self, bottom_value=50, max_value=None, steps=None): + + '''max_value and steps has to be either None or + integer value at the same time.''' + + if max_value is None and steps is None: + values = [int(np.exp2(i/2))+50 for i in range(3, 15)] + else: + values = range(bottom_value, max_value, steps) + + self._append_params('epochs', values) + + def kernel_initializers(self): + + self._append_params('kernel_initializer', + ['glorot_uniform', 'glorot_normal', + 'random_uniform', 'random_normal']) + + def lr(self): + + a = np.round(np.arange(0.01, 0.2, 0.02), 3).tolist() + b = np.round(np.arange(0, 1, 0.2), 2).tolist() + c = list(range(0, 11)) + + self._append_params('lr', a + b + c) + + def networks(self): + + '''Adds four different network architectures are parameters: + dense, simplernn, lstm, conv1d.''' + + self._append_params('network', ['dense', 'simplernn', 'lstm', 'conv1d']) + + def last_activations(self): + + self._append_params('last_activation', last_activation[self.task]) + + def _append_params(self, label, values): + + if self.replace is False: + try: + self.params[label] + except KeyError: + self.params[label] = values + + else: + self.params[label] = values diff --git a/talos/examples/__init__.py b/talos/examples/__init__.py index 8b137891..979a12f5 100644 --- a/talos/examples/__init__.py +++ b/talos/examples/__init__.py @@ -1 +1,4 @@ - +from . import datasets +from . import models +from . import params +from . import pipelines diff --git a/talos/examples/datasets.py b/talos/examples/datasets.py index 740d1183..706a101d 100755 --- a/talos/examples/datasets.py +++ b/talos/examples/datasets.py @@ -1,22 +1,11 @@ -import pandas as pd -from numpy import nan -from keras.utils import to_categorical - - -base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - - -def limit_rows(data, samples): - - return data.sample(frac=1).head(samples) - - def icu_mortality(samples=None): + import pandas as pd + base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' df = pd.read_csv(base + 'icu_mortality.csv') df = df.dropna(thresh=3580, axis=1) df = df.dropna() - df = limit_rows(df, samples) + df = df.sample(frac=1).head(samples) y = df['hospitalmortality'].astype(int).values x = df.drop('hospitalmortality', axis=1).values @@ -25,6 +14,8 @@ def icu_mortality(samples=None): def titanic(): + import pandas as pd + base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' df = pd.read_csv(base + 'titanic.csv') y = df.survived.values @@ -46,6 +37,9 @@ def titanic(): def iris(): + import pandas as pd + from keras.utils import to_categorical + base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' df = pd.read_csv(base + 'iris.csv') df['species'] = df['species'].factorize()[0] df = df.sample(len(df)) @@ -60,6 +54,9 @@ def iris(): def cervical_cancer(): + import pandas as pd + from numpy import nan + base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' df = pd.read_csv(base + 'cervical_cancer.csv') df = df.replace('?', nan) df = df.drop(['citology', 'hinselmann', 'biopsy'], axis=1) @@ -76,6 +73,8 @@ def cervical_cancer(): def breast_cancer(): + import pandas as pd + base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' df = pd.read_csv(base + 'breast_cancer.csv') # then some minimal data cleanup diff --git a/talos/examples/models.py b/talos/examples/models.py index 7c2ceefa..cbd68a88 100644 --- a/talos/examples/models.py +++ b/talos/examples/models.py @@ -1,44 +1,43 @@ #!/usr/bin/env python -from talos.model import lr_normalizer, early_stopper, hidden_layers -from keras.models import Sequential -from keras.layers import Dropout, Dense +def titanic(x_train, y_train, x_val, y_val, params): -from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc -from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + from keras.models import Sequential + from keras.layers import Dropout, Dense + # note how instead of passing the value, we pass a dictionary entry + model = Sequential() + model.add(Dense(params['first_neuron'], + input_dim=x_train.shape[1], + activation='relu')) -def titanic_model(x_train, y_train, x_val, y_val, params): - - # note how instead of passing the value, we pass a dictionary entry - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) + # same here, just passing a dictionary entry + model.add(Dropout(params['dropout'])) - # same here, just passing a dictionary entry - model.add(Dropout(params['dropout'])) + # again, instead of the activation name, we have a dictionary entry + model.add(Dense(1, activation=params['last_activation'])) - # again, instead of the activation name, we have a dictionary entry - model.add(Dense(1, activation=params['last_activation'])) + # here are using a learning rate boundary + model.compile(optimizer=params['optimizer'], + loss=params['losses'], + metrics=['acc']) - # here are using a learning rate boundary - model.compile(optimizer=params['optimizer'], - loss=params['losses'], - metrics=['acc']) + # here we are also using the early_stopper function for a callback + out = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=2, + verbose=0, + validation_data=[x_val, y_val]) - # here we are also using the early_stopper function for a callback - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=2, - verbose=0, - validation_data=[x_val, y_val]) + return out, model - return out, model +def iris(x_train, y_train, x_val, y_val, params): -def iris_model(x_train, y_train, x_val, y_val, params): + from keras.models import Sequential + from keras.layers import Dropout, Dense + from talos.model import lr_normalizer, early_stopper, hidden_layers # note how instead of passing the value, we pass a dictionary entry model = Sequential() @@ -74,7 +73,14 @@ def iris_model(x_train, y_train, x_val, y_val, params): return out, model -def cervix_model(x_train, y_train, x_val, y_val, params): +def cervical_cancer(x_train, y_train, x_val, y_val, params): + + from keras.models import Sequential + from keras.layers import Dropout, Dense + from talos.model import lr_normalizer, early_stopper, hidden_layers + + from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc + from talos.metrics.keras_metrics import recall_acc, fmeasure_acc model = Sequential() model.add(Dense(params['first_neuron'], @@ -103,7 +109,7 @@ def cervix_model(x_train, y_train, x_val, y_val, params): verbose=0, validation_data=[x_val, y_val], callbacks=[early_stopper(params['epochs'], - mode='moderate', - monitor='val_fmeasure')]) + mode='moderate', + monitor='val_fmeasure')]) return results, model diff --git a/talos/examples/params.py b/talos/examples/params.py index 2a9e6d0d..5d334b62 100644 --- a/talos/examples/params.py +++ b/talos/examples/params.py @@ -1,9 +1,4 @@ -from keras.optimizers import Adam, Nadam, RMSprop -from keras.activations import relu, elu, softmax, sigmoid -from keras.losses import logcosh, binary_crossentropy, categorical_crossentropy - - -def titanic_params(): +def titanic(): # here use a standard 2d dictionary for inputting the param boundaries p = {'lr': (0.5, 5, 10), @@ -20,6 +15,10 @@ def titanic_params(): def iris(): + from keras.optimizers import Adam, Nadam + from keras.losses import logcosh, categorical_crossentropy + from keras.activations import relu, elu, softmax + # here use a standard 2d dictionary for inputting the param boundaries p = {'lr': (0.5, 5, 10), 'first_neuron': [4, 8, 16, 32, 64], @@ -41,6 +40,10 @@ def iris(): def breast_cancer(): + from keras.optimizers import Adam, Nadam, RMSprop + from keras.losses import logcosh, binary_crossentropy + from keras.activations import relu, elu, sigmoid + # then we can go ahead and set the parameter space p = {'lr': (0.5, 5, 10), 'first_neuron': [4, 8, 16, 32, 64], diff --git a/talos/examples/pipelines.py b/talos/examples/pipelines.py index e597b95b..aa476684 100644 --- a/talos/examples/pipelines.py +++ b/talos/examples/pipelines.py @@ -1,35 +1,24 @@ -import talos as ta - -from talos.examples.datasets import titanic -from talos.examples.params import titanic_params -from talos.examples.models import titanic_model - -from talos.examples.datasets import iris -from talos.examples.models import iris_model -from talos.examples.params import iris as iris_params - - -def titanic_pipeline(round_limit=2, random_method='uniform_mersenne'): +def titanic(round_limit=2, random_method='uniform_mersenne'): '''Performs a Scan with Iris dataset and simple dense net''' - - scan_object = ta.Scan(titanic()[0][:50], - titanic()[1][:50], - titanic_params(), - titanic_model, + import talos as ta + scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50], + ta.templates.datasets.titanic()[1][:50], + ta.templates.params.titanic(), + ta.templates.models.titanic, round_limit=round_limit) return scan_object -def iris_pipeline(round_limit=5, random_method='uniform_mersenne'): +def iris(round_limit=5, random_method='uniform_mersenne'): '''Performs a Scan with Iris dataset and simple dense net''' - - scan_object = ta.Scan(iris()[0], - iris()[1], - iris_params(), - iris_model, + import talos as ta + scan_object = ta.Scan(ta.templates.datasets.iris()[0], + ta.templates.datasets.iris()[1], + ta.templates.params.iris(), + ta.templates.models.iris, round_limit=round_limit) return scan_object diff --git a/talos/metrics/keras_metrics.py b/talos/metrics/keras_metrics.py index cecf2bb9..5ce7f32a 100644 --- a/talos/metrics/keras_metrics.py +++ b/talos/metrics/keras_metrics.py @@ -1,7 +1,8 @@ -from keras import backend as K + def root_mean_squared_error(y_true, y_pred): + from keras import backend as K return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) @@ -10,7 +11,7 @@ def matthews_correlation_acc(y_true, y_pred): '''Calculates the Matthews correlation coefficient measure for quality of binary classification problems. ''' - + from keras import backend as K y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos @@ -34,7 +35,7 @@ def precision_acc(y_true, y_pred): '''Calculates the precision, a metric for multi-label classification of how many selected items are relevant. ''' - + from keras import backend as K true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) @@ -46,7 +47,7 @@ def recall_acc(y_true, y_pred): '''Calculates the recall, a metric for multi-label classification of how many relevant items are selected. ''' - + from keras import backend as K true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) @@ -67,7 +68,7 @@ def fbeta_score_acc(y_true, y_pred, beta=1): correct classes becomes more important, and with beta > 1 the metric is instead weighted towards penalizing incorrect class assignments. ''' - + from keras import backend as K if beta < 0: raise ValueError('The lowest choosable beta is zero (only precision).') diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index ebac6fd4..32fae091 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -1,4 +1,4 @@ -from numpy import arange, unique, array, column_stack +import numpy as np from ..reducers.sample_reducer import sample_reducer @@ -22,12 +22,13 @@ def __init__(self, main_self): # convert the input to useful format self._p = self._param_input_conversion() + # create a list of lists, each list being a parameter sequence ls = [list(self._p[key]) for key in self._p.keys()] - virtual_grid_size = 1 - for l in ls: - virtual_grid_size *= len(l) + # get the number of total dimensions / permutations + virtual_grid_size = np.prod([len(l) for l in ls]) final_grid_size = virtual_grid_size + # calculate the size of the downsample if self.main_self.grid_downsample is not None: final_grid_size = int(virtual_grid_size * self.main_self.grid_downsample) @@ -43,36 +44,35 @@ def __init__(self, main_self): out = range(0, final_grid_size) # build the parameter permutation grid - self.param_grid = self._param_grid(ls, out) + self.param_grid = self._create_param_permutations(ls, out) # initialize with random shuffle if needed if self.main_self.shuffle: - random.shuffle(self.param_grid) + np.random.shuffle(self.param_grid) # create a index for logging purpose self.param_log = list(range(len(self.param_grid))) # add the log index to param grid - self.param_grid = column_stack((self.param_grid, self.param_log)) + self.param_grid = np.column_stack((self.param_grid, self.param_log)) - def _param_grid(self, ls, product_indices): + def _create_param_permutations(self, ls, permutation_index): - '''CREATE THE PARAMETER PERMUTATIONS + '''Expand params dictionary to permutations - This is done once before starting the experiment. - Takes in the parameter dictionary, and returns - every possible permutation in an array. + Takes the input params dictionary and expands it to + actual parameter permutations for the experiment. ''' - prod = [] - for i in product_indices: # the product indices are the output of our random function + final_grid = [] + for i in permutation_index: p = [] for l in reversed(ls): - i, s = divmod(int(i), len(l)) # NOTE i is updated shifting away the information for this parameter + i, s = divmod(int(i), len(l)) p.insert(0, l[s]) - prod.append(tuple(p)) + final_grid.append(tuple(p)) - _param_grid_out = array(prod, dtype='object') + _param_grid_out = np.array(final_grid, dtype='object') return _param_grid_out @@ -102,23 +102,21 @@ def _param_input_conversion(self): def _param_range(self, start, end, n): - '''PARAMETER RANGE - - Deals with the format where a start, end - and steps values are given for a parameter - in a tuple format. + '''Deal with ranged inputs in params dictionary - This is called internally from param_format() + A helper function to handle the cases where params + dictionary input is in the format (start, end, steps) + and is called internally through ParamGrid(). ''' try: - out = arange(start, end, (end - start) / n, dtype=float) + out = np.arange(start, end, (end - start) / n, dtype=float) # this is for python2 except ZeroDivisionError: - out = arange(start, end, (end - start) / float(n), dtype=float) + out = np.arange(start, end, (end - start) / float(n), dtype=float) if type(start) == int and type(end) == int: out = out.astype(int) - out = unique(out) + out = np.unique(out) return out diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 37705707..a78ef88a 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -66,7 +66,7 @@ class Scan: the history and compiled model. val_split : float, optional The proportion of the input `x` which is set aside as the - cross-validation data. (Default is 0.3). + validation data. (Default is 0.3). shuffle : bool, optional If True, shuffle the data in x and y before splitting into the train and cross-validation datasets. (Default is True). diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index 8b137891..adf69eb2 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -1 +1,11 @@ +# In this init we load everything under utils in the Talos namespace +try: + from kerasplotlib import TrainingLog as live +except ImportError: + print('Matplotlib backend loading failed') + +from ..model.normalizers import lr_normalizer +from ..model.layers import hidden_layers +from ..model.early_stopper import early_stopper +import talos.metrics.keras_metrics as metrics diff --git a/talos/utils/string_cols_to_numeric.py b/talos/utils/string_cols_to_numeric.py index 930db859..766b14dc 100644 --- a/talos/utils/string_cols_to_numeric.py +++ b/talos/utils/string_cols_to_numeric.py @@ -25,7 +25,10 @@ def string_cols_to_numeric(data, destructive=False): try: data[col] = data[col].astype(int) except ValueError: - data[col] = data[col].astype(float) + try: + data[col] = data[col].astype(float) + except ValueError: + data[col] = data[col] else: data[col] = data[col] diff --git a/test/core_tests/test_auto_scan.py b/test/core_tests/test_auto_scan.py new file mode 100644 index 00000000..2be1c1f6 --- /dev/null +++ b/test/core_tests/test_auto_scan.py @@ -0,0 +1,21 @@ +import talos as ta + + +def test_auto_scan(): + + '''Tests the object from Params()''' + + print('Start auto Scan()...') + + x, y = ta.templates.datasets.breast_cancer() + x = x[:50] + y = y[:50] + + p = ta.Params().params + + for key in p.keys(): + p[key] = [p[key][0]] + + ta.Scan(x, y, p, ta.KerasModel().model) + + return "Finished testing auto Scan()" diff --git a/test/core_tests/test_params_object.py b/test/core_tests/test_params_object.py new file mode 100644 index 00000000..324ae4df --- /dev/null +++ b/test/core_tests/test_params_object.py @@ -0,0 +1,43 @@ +import talos as ta + + +def test_params_object(): + + '''Tests the object from Params()''' + + print('Start testing Params object...') + + p = ta.Params() + + # without arguments + + p.activations() + p.batch_size() + p.dropout() + p.epochs() + p.kernel_initializers() + p.layers() + p.neurons() + p.lr() + p.optimizers() + p.shapes() + p.shapes_slope() + p.automated() + + p = ta.Params(replace=False) + + # with arguments + p.activations() + p.batch_size(10, 100, 5) + p.dropout() + p.epochs(10, 100, 5) + p.kernel_initializers() + p.layers(12) + p.neurons(10, 100, 5) + p.lr() + p.optimizers('multi_label') + p.shapes() + p.shapes_slope() + p.automated('sloped') + + return "Finished testing Params object!" diff --git a/test/core_tests/test_random_methods.py b/test/core_tests/test_random_methods.py index 564070f5..79b55e04 100644 --- a/test/core_tests/test_random_methods.py +++ b/test/core_tests/test_random_methods.py @@ -1,4 +1,4 @@ -from talos.examples.pipelines import titanic_pipeline +import talos as ta def test_random_methods(): @@ -21,6 +21,6 @@ def test_random_methods(): 'ambience'] for method in random_methods: - titanic_pipeline(random_method=method) + ta.templates.pipelines.titanic(random_method=method) return "Finished testing random methods!" diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 4e79bc3d..08d705d2 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -12,8 +12,9 @@ from talos.scan.Scan import Scan from talos.commands.reporting import Reporting -from talos.examples.models import iris_model, cervix_model -from talos import datasets, params + +import talos as ta + p1 = {'lr': [1], 'first_neuron': [4], @@ -61,26 +62,26 @@ class TestIris: def __init__(self): - self.x, self.y = datasets.iris() + self.x, self.y = ta.templates.datasets.iris() self.x_train, self.x_dev, self.y_train, self.y_dev \ = train_test_split(self.x, self.y, test_size=0.2) def test_scan_iris_1(self): print("Running Iris dataset test 1...") Scan(self.x, self.y, params=p1, dataset_name='testing', - experiment_no='000', model=iris_model) + experiment_no='000', model=ta.templates.models.iris) def test_scan_iris_2(self): print("Running Iris dataset test 2...") Scan(self.x, self.y, params=p2, dataset_name='testing', - experiment_no='000', model=iris_model, + experiment_no='000', model=ta.templates.models.iris, last_epoch_value=True) def test_scan_iris_explicit_validation_set(self): print("Running explicit validation dataset test with metric reduction") Scan(self.x_train, self.y_train, params=p2, dataset_name='testing', - experiment_no='000', model=iris_model, + experiment_no='000', model=ta.templates.models.iris, x_val=self.x_dev, y_val=self.y_dev) def test_scan_iris_explicit_validation_set_force_fail(self): @@ -88,7 +89,7 @@ def test_scan_iris_explicit_validation_set_force_fail(self): try: Scan(self.x_train, self.y_train, params=p2, dataset_name='testing', - experiment_no='000', model=iris_model, + experiment_no='000', model=ta.templates.models.iris, y_val=self.y_dev) except RuntimeError: pass @@ -97,8 +98,8 @@ def test_scan_iris_explicit_validation_set_force_fail(self): class TestCancer: def __init__(self): - self.x, self.y = datasets.cervical_cancer() - self.model = cervix_model + self.x, self.y = ta.templates.datasets.cervical_cancer() + self.model = ta.templates.models.cervical_cancer def test_scan_cancer_metric_reduction(self): print("Running Cervical Cancer dataset test...") @@ -160,11 +161,11 @@ class TestLoadDatasets: def __init__(self): print("Testing Load Datasets...") - x = datasets.icu_mortality() - x = datasets.icu_mortality(100) - x = datasets.titanic() - x = datasets.iris() - x = datasets.cervical_cancer() - x = datasets.breast_cancer() - x = params.iris() - x = params.breast_cancer() # noqa + x = ta.templates.datasets.icu_mortality() + x = ta.templates.datasets.icu_mortality(100) + x = ta.templates.datasets.titanic() + x = ta.templates.datasets.iris() + x = ta.templates.datasets.cervical_cancer() + x = ta.templates.datasets.breast_cancer() + x = ta.templates.params.iris() + x = ta.templates.params.breast_cancer() # noqa diff --git a/test/core_tests/test_scan_object.py b/test/core_tests/test_scan_object.py index d2e60024..a777868e 100644 --- a/test/core_tests/test_scan_object.py +++ b/test/core_tests/test_scan_object.py @@ -1,5 +1,5 @@ # first load the pipeline -from talos.examples.pipelines import iris_pipeline +import talos as ta def test_scan_object(): @@ -7,7 +7,7 @@ def test_scan_object(): print("Running Scan object test...") # the create the test based on it - scan_object = iris_pipeline() + scan_object = ta.templates.pipelines.iris() keras_model = scan_object.best_model() scan_object.evaluate_models(x_val=scan_object.x, y_val=scan_object.y) diff --git a/test_script.py b/test_script.py index 2165b293..93e1b6cb 100644 --- a/test_script.py +++ b/test_script.py @@ -8,6 +8,9 @@ from test.core_tests.test_scan_object import test_scan_object from test.core_tests.test_reporting_object import test_reporting_object from test.core_tests.test_random_methods import test_random_methods +from test.core_tests.test_params_object import test_params_object +from test.core_tests.test_auto_scan import test_auto_scan + from talos.utils.generator import generator from talos.utils.gpu_utils import force_cpu @@ -21,6 +24,8 @@ # Reporting test_reporting_object(scan_object) + test_params_object() + test_auto_scan() start_time = str(time.strftime("%s")) @@ -34,7 +39,6 @@ ta.Restore(start_time + '.zip') test_random_methods() - fit_generator = generator(scan_object.x, scan_object.y, 20) force_cpu() From aa51c8d61ff981fdb9e4f1580165aa7dd5989194 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 22 Feb 2019 21:51:36 +0200 Subject: [PATCH 03/26] 4 complete template sets and small fixes - added models and params under ta.templates - now cervical cancer, breast cancer, iris, and titanic have dataset, params, and input model - network_shape() now return [0] if params['hidden_layers'] is 0. This permits the situation where 0 is one option in the experiment - comprehensive functionality tests for templates added --- talos/examples/models.py | 41 +++++++++++++++++++++++++++++++ talos/examples/params.py | 8 +++--- talos/model/network_shape.py | 4 +++ test/core_tests/test_templates.py | 33 +++++++++++++++++++++++++ test_script.py | 2 ++ 5 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 test/core_tests/test_templates.py diff --git a/talos/examples/models.py b/talos/examples/models.py index cbd68a88..b063b346 100644 --- a/talos/examples/models.py +++ b/talos/examples/models.py @@ -73,6 +73,47 @@ def iris(x_train, y_train, x_val, y_val, params): return out, model +def breast_cancer(x_train, y_train, x_val, y_val, params): + + from keras.models import Sequential + from keras.layers import Dropout, Dense + from talos.model import lr_normalizer, early_stopper, hidden_layers + + from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc + from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + + model = Sequential() + model.add(Dense(params['first_neuron'], + input_dim=x_train.shape[1], + activation='relu')) + + model.add(Dropout(params['dropout'])) + + hidden_layers(model, params, 1) + + model.add(Dense(1, activation=params['last_activation'])) + + model.compile(optimizer=params['optimizer'] + (lr=lr_normalizer(params['lr'], + params['optimizer'])), + loss=params['losses'], + metrics=['acc', + fmeasure_acc, + recall_acc, + precision_acc, + matthews_correlation_acc]) + + results = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=params['epochs'], + verbose=0, + validation_data=[x_val, y_val], + callbacks=[early_stopper(params['epochs'], + mode='moderate', + monitor='val_fmeasure')]) + + return results, model + def cervical_cancer(x_train, y_train, x_val, y_val, params): from keras.models import Sequential diff --git a/talos/examples/params.py b/talos/examples/params.py index 5d334b62..00b53c02 100644 --- a/talos/examples/params.py +++ b/talos/examples/params.py @@ -51,12 +51,14 @@ def breast_cancer(): 'batch_size': (2, 30, 10), 'epochs': [50, 100, 150], 'dropout': (0, 0.5, 5), - 'weight_regulizer': [None], - 'emb_output_dims': [None], - 'shape': ['brick', 'long_funnel'], + 'shapes': ['brick', 'triangle', 'funnel'], 'optimizer': [Adam, Nadam, RMSprop], 'losses': [logcosh, binary_crossentropy], 'activation': [relu, elu], 'last_activation': [sigmoid]} return p + + +def cervical_cancer(): + return breast_cancer() diff --git a/talos/model/network_shape.py b/talos/model/network_shape.py index c616a1d0..6f70052a 100644 --- a/talos/model/network_shape.py +++ b/talos/model/network_shape.py @@ -23,6 +23,10 @@ def network_shape(params, last_neuron): out = [] n = first_neuron + # the case where hidden_layers is zero + if layers == 0: + return [0] + # the cases where an angle is applied if isinstance(shape, float): diff --git a/test/core_tests/test_templates.py b/test/core_tests/test_templates.py new file mode 100644 index 00000000..eae443db --- /dev/null +++ b/test/core_tests/test_templates.py @@ -0,0 +1,33 @@ +def test_templates(): + + import talos as ta + + x, y = ta.templates.datasets.titanic() + x = x[:50] + y = y[:50] + model = ta.templates.models.titanic + p = ta.templates.params.titanic() + ta.Scan(x, y, p, model, round_limit=2) + + x, y = ta.templates.datasets.iris() + x = x[:50] + y = y[:50] + model = ta.templates.models.iris + p = ta.templates.params.iris() + ta.Scan(x, y, p, model, round_limit=2) + + x, y = ta.templates.datasets.cervical_cancer() + x = x[:50] + y = y[:50] + model = ta.templates.models.cervical_cancer + p = ta.templates.params.cervical_cancer() + ta.Scan(x, y, p, model, round_limit=2) + + x, y = ta.templates.datasets.breast_cancer() + x = x[:50] + y = y[:50] + model = ta.templates.models.breast_cancer + p = ta.templates.params.breast_cancer() + ta.Scan(x, y, p, model, round_limit=2) + + x, y = ta.templates.datasets.icu_mortality(50) diff --git a/test_script.py b/test_script.py index 93e1b6cb..cc176eab 100644 --- a/test_script.py +++ b/test_script.py @@ -10,6 +10,7 @@ from test.core_tests.test_random_methods import test_random_methods from test.core_tests.test_params_object import test_params_object from test.core_tests.test_auto_scan import test_auto_scan +from test.core_tests.test_templates import test_templates from talos.utils.generator import generator from talos.utils.gpu_utils import force_cpu @@ -26,6 +27,7 @@ test_reporting_object(scan_object) test_params_object() test_auto_scan() + test_templates() start_time = str(time.strftime("%s")) From 3ffabf6d9f40911a3974b1988b81efad2fcd5c60 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 22 Feb 2019 22:15:04 +0200 Subject: [PATCH 04/26] added template.pipelines - pipeline added for breast cancer and cervical cancer - tests added for all pipeline templatessAAsas --- talos/examples/pipelines.py | 38 ++++++++++++++++++++++++++----- test/core_tests/test_templates.py | 5 ++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/talos/examples/pipelines.py b/talos/examples/pipelines.py index aa476684..0c802d46 100644 --- a/talos/examples/pipelines.py +++ b/talos/examples/pipelines.py @@ -1,17 +1,30 @@ -def titanic(round_limit=2, random_method='uniform_mersenne'): +def breast_cancer(round_limit=2, random_method='uniform_mersenne'): '''Performs a Scan with Iris dataset and simple dense net''' import talos as ta - scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50], - ta.templates.datasets.titanic()[1][:50], - ta.templates.params.titanic(), - ta.templates.models.titanic, + scan_object = ta.Scan(ta.templates.datasets.breast_cancer()[0], + ta.templates.datasets.breast_cancer()[1], + ta.templates.params.breast_cancer(), + ta.templates.models.breast_cancer, + round_limit=round_limit) + + return scan_object + + +def cervical_cancer(round_limit=2, random_method='uniform_mersenne'): + + '''Performs a Scan with Iris dataset and simple dense net''' + import talos as ta + scan_object = ta.Scan(ta.templates.datasets.cervical_cancer()[0], + ta.templates.datasets.cervical_cancer()[1], + ta.templates.params.cervical_cancer(), + ta.templates.models.cervical_cancer, round_limit=round_limit) return scan_object -def iris(round_limit=5, random_method='uniform_mersenne'): +def iris(round_limit=2, random_method='uniform_mersenne'): '''Performs a Scan with Iris dataset and simple dense net''' import talos as ta @@ -22,3 +35,16 @@ def iris(round_limit=5, random_method='uniform_mersenne'): round_limit=round_limit) return scan_object + + +def titanic(round_limit=2, random_method='uniform_mersenne'): + + '''Performs a Scan with Iris dataset and simple dense net''' + import talos as ta + scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50], + ta.templates.datasets.titanic()[1][:50], + ta.templates.params.titanic(), + ta.templates.models.titanic, + round_limit=round_limit) + + return scan_object diff --git a/test/core_tests/test_templates.py b/test/core_tests/test_templates.py index eae443db..32a9bbfd 100644 --- a/test/core_tests/test_templates.py +++ b/test/core_tests/test_templates.py @@ -31,3 +31,8 @@ def test_templates(): ta.Scan(x, y, p, model, round_limit=2) x, y = ta.templates.datasets.icu_mortality(50) + + ta.templates.pipelines.breast_cancer(random_method='quantum') + ta.templates.pipelines.cervical_cancer(random_method='sobol') + ta.templates.pipelines.iris(random_method='uniform_crypto') + ta.templates.pipelines.titanic(random_method='korobov_matrix') From 1bbb1d492c5a4975ebe341b79b1dce5022f9029d Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Sat, 23 Feb 2019 16:52:40 +0200 Subject: [PATCH 05/26] fixed KerasModel() and added new templates - fixed an issue with KerasModel() that aftected conv1d, lstm, and simplernn - added Bidirectional LSTM to KerasModel() - added two new models to /templates - made scan_finish() dtype conversion more allowing for special cases and errors as such columns should not be forcefully converted in any case (the main thing is to have the metrics converted always) - fixed ta.Params() which had 'loss' instead of 'losses' (which creates problems later in the results table --- talos/commands/kerasmodel.py | 49 +++------ talos/commands/params.py | 8 +- talos/examples/models.py | 143 +++++++++++++------------- talos/utils/string_cols_to_numeric.py | 4 +- 4 files changed, 96 insertions(+), 108 deletions(-) diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py index 1b7060f9..43c452f6 100644 --- a/talos/commands/kerasmodel.py +++ b/talos/commands/kerasmodel.py @@ -5,7 +5,7 @@ from keras.models import Sequential from keras.layers import Dropout, Flatten -from keras.layers import LSTM, Conv1D, SimpleRNN, Dense +from keras.layers import LSTM, Conv1D, SimpleRNN, Dense, Bidirectional try: from wrangle.reshape_to_conv1d import reshape_to_conv1d as array_reshape_conv1d @@ -33,16 +33,24 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): model = Sequential() + if params['network'] != 'dense': + x_train = array_reshape_conv1d(x_train) + x_val = array_reshape_conv1d(x_val) + if params['network'] == 'conv1d': - x, model = _add_conv1d(x_train, model, params['first_neuron'], x_train.shape[1]) + model.add(Conv1D(params['first_neuron'], x_train.shape[1])) + model.add(Flatten()) + + elif params['network'] == 'lstm': + model.add(LSTM(params['first_neuron'])) - if params['network'] == 'lstm': - x, model = _add_lstm(x_train, model, params['first_neuron']) + if params['network'] == 'bidirectional_lstm': + model.add(Bidirectional(LSTM(params['first_neuron']))) - if params['network'] == 'simplernn': - x, model = _add_simplernn(x_train, model, params['first_neuron']) + elif params['network'] == 'simplernn': + model.add(SimpleRNN(params['first_neuron'])) - if params['network'] == 'dense': + elif params['network'] == 'dense': model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1], activation='relu')) @@ -69,7 +77,7 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): # compile the model model.compile(optimizer=optimizer, - loss=params['loss'], + loss=params['losses'], metrics=['acc']) # fit the model @@ -81,28 +89,3 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): # pass the output to Talos return out, model - - -def _add_conv1d(x, model, filters, kernel_size): - - x = array_reshape_conv1d(x) - model.add(Conv1D(filters, kernel_size)) - model.add(Flatten()) - - return x, model - - -def _add_lstm(x, model, units): - - x = array_reshape_conv1d(x) - model.add(LSTM(units)) - - return x, model - - -def _add_simplernn(x, model, units): - - x = array_reshape_conv1d(x) - model.add(SimpleRNN(units)) - - return x, model diff --git a/talos/commands/params.py b/talos/commands/params.py index 53be6ab6..662c1392 100644 --- a/talos/commands/params.py +++ b/talos/commands/params.py @@ -114,7 +114,7 @@ def activations(self): def losses(self): - self._append_params('loss', loss[self.task]) + self._append_params('losses', loss[self.task]) def neurons(self, bottom_value=8, max_value=None, steps=None): @@ -171,7 +171,11 @@ def networks(self): '''Adds four different network architectures are parameters: dense, simplernn, lstm, conv1d.''' - self._append_params('network', ['dense', 'simplernn', 'lstm', 'conv1d']) + self._append_params('network', ['dense', + 'simplernn', + 'lstm', + 'bidirectional_lstm', + 'conv1d']) def last_activations(self): diff --git a/talos/examples/models.py b/talos/examples/models.py index b063b346..0967bee8 100644 --- a/talos/examples/models.py +++ b/talos/examples/models.py @@ -1,79 +1,49 @@ #!/usr/bin/env python -def titanic(x_train, y_train, x_val, y_val, params): - - from keras.models import Sequential - from keras.layers import Dropout, Dense - - # note how instead of passing the value, we pass a dictionary entry - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - # same here, just passing a dictionary entry - model.add(Dropout(params['dropout'])) - - # again, instead of the activation name, we have a dictionary entry - model.add(Dense(1, activation=params['last_activation'])) - - # here are using a learning rate boundary - model.compile(optimizer=params['optimizer'], - loss=params['losses'], - metrics=['acc']) - - # here we are also using the early_stopper function for a callback - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=2, - verbose=0, - validation_data=[x_val, y_val]) - - return out, model - - -def iris(x_train, y_train, x_val, y_val, params): +def breast_cancer(x_train, y_train, x_val, y_val, params): from keras.models import Sequential from keras.layers import Dropout, Dense from talos.model import lr_normalizer, early_stopper, hidden_layers - # note how instead of passing the value, we pass a dictionary entry + from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc + from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + model = Sequential() model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1], activation='relu')) - # same here, just passing a dictionary entry model.add(Dropout(params['dropout'])) - # with this call we can create any number of hidden layers - hidden_layers(model, params, y_train.shape[1]) + hidden_layers(model, params, 1) - # again, instead of the activation name, we have a dictionary entry - model.add(Dense(y_train.shape[1], - activation=params['last_activation'])) + model.add(Dense(1, activation=params['last_activation'])) - # here are using a learning rate boundary model.compile(optimizer=params['optimizer'] (lr=lr_normalizer(params['lr'], params['optimizer'])), loss=params['losses'], - metrics=['acc']) + metrics=['acc', + fmeasure_acc, + recall_acc, + precision_acc, + matthews_correlation_acc]) - # here we are also using the early_stopper function for a callback - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val], - callbacks=[early_stopper(params['epochs'], mode=[1, 1])]) + results = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=params['epochs'], + verbose=0, + validation_data=[x_val, y_val], + callbacks=[early_stopper(params['epochs'], + mode='moderate', + monitor='val_fmeasure')]) - return out, model + return results, model -def breast_cancer(x_train, y_train, x_val, y_val, params): +def cervical_cancer(x_train, y_train, x_val, y_val, params): from keras.models import Sequential from keras.layers import Dropout, Dense @@ -114,43 +84,74 @@ def breast_cancer(x_train, y_train, x_val, y_val, params): return results, model -def cervical_cancer(x_train, y_train, x_val, y_val, params): + +def titanic(x_train, y_train, x_val, y_val, params): from keras.models import Sequential from keras.layers import Dropout, Dense - from talos.model import lr_normalizer, early_stopper, hidden_layers - from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc - from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + # note how instead of passing the value, we pass a dictionary entry + model = Sequential() + model.add(Dense(params['first_neuron'], + input_dim=x_train.shape[1], + activation='relu')) + + # same here, just passing a dictionary entry + model.add(Dropout(params['dropout'])) + + # again, instead of the activation name, we have a dictionary entry + model.add(Dense(1, activation=params['last_activation'])) + + # here are using a learning rate boundary + model.compile(optimizer=params['optimizer'], + loss=params['losses'], + metrics=['acc']) + + # here we are also using the early_stopper function for a callback + out = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=2, + verbose=0, + validation_data=[x_val, y_val]) + + return out, model + +def iris(x_train, y_train, x_val, y_val, params): + + from keras.models import Sequential + from keras.layers import Dropout, Dense + from talos.model import lr_normalizer, early_stopper, hidden_layers + + # note how instead of passing the value, we pass a dictionary entry model = Sequential() model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1], activation='relu')) + # same here, just passing a dictionary entry model.add(Dropout(params['dropout'])) - hidden_layers(model, params, 1) + # with this call we can create any number of hidden layers + hidden_layers(model, params, y_train.shape[1]) - model.add(Dense(1, activation=params['last_activation'])) + # again, instead of the activation name, we have a dictionary entry + model.add(Dense(y_train.shape[1], + activation=params['last_activation'])) + # here are using a learning rate boundary model.compile(optimizer=params['optimizer'] (lr=lr_normalizer(params['lr'], params['optimizer'])), loss=params['losses'], - metrics=['acc', - fmeasure_acc, - recall_acc, - precision_acc, - matthews_correlation_acc]) + metrics=['acc']) - results = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val], - callbacks=[early_stopper(params['epochs'], - mode='moderate', - monitor='val_fmeasure')]) + # here we are also using the early_stopper function for a callback + out = model.fit(x_train, y_train, + batch_size=params['batch_size'], + epochs=params['epochs'], + verbose=0, + validation_data=[x_val, y_val], + callbacks=[early_stopper(params['epochs'], mode=[1, 1])]) - return results, model + return out, model diff --git a/talos/utils/string_cols_to_numeric.py b/talos/utils/string_cols_to_numeric.py index 766b14dc..bb8494da 100644 --- a/talos/utils/string_cols_to_numeric.py +++ b/talos/utils/string_cols_to_numeric.py @@ -24,10 +24,10 @@ def string_cols_to_numeric(data, destructive=False): if data[col].apply(isnumber).sum() == len(data): try: data[col] = data[col].astype(int) - except ValueError: + except: # intentionally silent try: data[col] = data[col].astype(float) - except ValueError: + except: # intentionally silent data[col] = data[col] else: data[col] = data[col] From 173a05553cd322f34c1e1dda8f64931f5c1f253f Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Wed, 27 Feb 2019 16:52:25 +0100 Subject: [PATCH 06/26] Added stopping after wall clock time --- talos/scan/Scan.py | 2 ++ talos/scan/scan_run.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index a78ef88a..ac2a94e6 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -124,6 +124,7 @@ def __init__(self, x, y, params, model, random_method='uniform_mersenne', seed=None, search_method='random', + max_iteration_start_time=None, reduction_method=None, reduction_interval=50, reduction_window=20, @@ -151,6 +152,7 @@ def __init__(self, x, y, params, model, self.shuffle = shuffle self.random_method = random_method self.search_method = search_method + self.max_iteration_start_time=max_iteration_start_time self.reduction_method = reduction_method self.reduction_interval = reduction_interval self.reduction_window = reduction_window diff --git a/talos/scan/scan_run.py b/talos/scan/scan_run.py index 74e897c0..765f19de 100644 --- a/talos/scan/scan_run.py +++ b/talos/scan/scan_run.py @@ -1,5 +1,7 @@ from tqdm import tqdm +from datetime import datetime + from ..utils.results import result_todf, peak_epochs_todf from .scan_round import scan_round from .scan_finish import scan_finish @@ -9,6 +11,9 @@ def scan_run(self): '''The high-level management of the scan procedures onwards from preparation. Manages round_run()''' + + if self.max_iteration_start_time != None: + stoptime=datetime.strptime(self.max_iteration_start_time,"%Y-%m-%d %H:%M") # main loop for the experiment # NOTE: the progress bar is also updated on line 73 @@ -17,6 +22,9 @@ def scan_run(self): while len(self.param_log) != 0: self = scan_round(self) self.pbar.update(1) + if self.max_iteration_start_time != None and datetime.now() > stoptime: + print("Time limit reached, experiment finished") + break self.pbar.close() # save the results From 1feb18d8d428226c11c2489b2a00fa25b326ec57 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Thu, 28 Feb 2019 08:36:10 +0100 Subject: [PATCH 07/26] allow direct setting of experiment_name --- talos/scan/Scan.py | 2 ++ talos/scan/scan_prepare.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index a78ef88a..eb47faeb 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -115,6 +115,7 @@ class Scan: def __init__(self, x, y, params, model, dataset_name=None, experiment_no=None, + experiment_name=None, x_val=None, y_val=None, val_split=.3, @@ -145,6 +146,7 @@ def __init__(self, x, y, params, model, self.model = model self.dataset_name = dataset_name self.experiment_no = experiment_no + self.experiment_name = experiment_name self.x_val = x_val self.y_val = y_val self.val_split = val_split diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index 61a7c265..ef77e841 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -24,7 +24,8 @@ def scan_prepare(self): if self.experiment_no is None: self.experiment_no = '' - self.experiment_name = self.dataset_name + '_' + self.experiment_no + if self.experiment_name is None: + self.experiment_name = self.dataset_name + '_' + self.experiment_no # create the round times list self.round_times = [] From dad2843a064f7ecaf8741e8086ce016c3e390f82 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 28 Feb 2019 14:23:20 +0200 Subject: [PATCH 08/26] added docstring to Deploy() --- talos/commands/deploy.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/talos/commands/deploy.py b/talos/commands/deploy.py index 7cd2e217..9e151acd 100644 --- a/talos/commands/deploy.py +++ b/talos/commands/deploy.py @@ -5,12 +5,6 @@ from ..utils.best_model import best_model, activate_model -# NOTE: this has some overlap with code in evaluate.py -# and needs to be cleaned up. -# TODO: needs to also deploy the hyperparameter configuration -# and some kind of summary of the experiment and then finally -# pack everything into a zip file - class Deploy: @@ -18,6 +12,27 @@ class Deploy: def __init__(self, scan_object, model_name, metric='val_acc', asc=False): + + '''Deploy a model to be used later or in a different system. + + NOTE: for a metric that is to be minimized, set asc=True or otherwise + you will end up with the model that has the highest loss. + + Deploy() takes in the object from Scan() and creates a package locally + that can be later activated with Restore(). + + scan_object : object + The object that is returned from Scan() upon completion. + model_name : str + Name for the .zip file to be created. + metric : str + The metric to be used for picking the best model. + asc: bool + Make this True for metrics that are to be minimized (e.g. loss) , and + False when the metric is to be maximized (e.g. acc) + + ''' + self.scan_object = scan_object os.mkdir(model_name) self.path = model_name + '/' + model_name From b64bed68f9b3ea2ab084270e1d7a7dcbb433b3a5 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 08:50:43 +0100 Subject: [PATCH 09/26] removed use of np.prod to avoid using limited size integers solving #244 --- talos/parameters/ParamGrid.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index 91e28daa..79619a51 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -24,7 +24,9 @@ def __init__(self, main_self): ls = [list(self._p[key]) for key in self._p.keys()] # get the number of total dimensions / permutations - virtual_grid_size = np.prod([len(l) for l in ls]) + virtual_grid_size=1 + for l in ls: + virtual_grid_size*=len(l) final_grid_size = virtual_grid_size # calculate the size of the downsample From 2a05cf473ffd4732e4c0962ad5d3a5905a77a843 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Wed, 27 Feb 2019 16:13:13 +0100 Subject: [PATCH 10/26] moved creation of round param dict into a function --- talos/parameters/round_params.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/talos/parameters/round_params.py b/talos/parameters/round_params.py index 79bca9d7..bae794aa 100644 --- a/talos/parameters/round_params.py +++ b/talos/parameters/round_params.py @@ -1,5 +1,13 @@ from numpy import random +def create_params_dict(self,_choice): + _round_params_dict = {} + x = 0 + for key in self.param_reference.keys(): + _round_params_dict[key] = self.param_grid[_choice][x] + x += 1 + + return _round_params_dict def round_params(self): @@ -20,10 +28,5 @@ def round_params(self): self.param_log.remove(_choice) # create a dictionary for the current round - _round_params_dict = {} - x = 0 - for key in self.param_reference.keys(): - _round_params_dict[key] = self.param_grid[_choice][x] - x += 1 + return create_params_dict(self,_choice) - return _round_params_dict From 035bbb7d106da017ff43533bacbc1fc5b864bc06 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Wed, 27 Feb 2019 16:15:38 +0100 Subject: [PATCH 11/26] moved creation of param_references to ParamGrid --- talos/parameters/ParamGrid.py | 6 ++++++ talos/scan/scan_prepare.py | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index 79619a51..11e04a6d 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -17,6 +17,12 @@ def __init__(self, main_self): self.main_self = main_self + # creates a reference dictionary for column number to label + self.param_reference = {} + for i, col in enumerate(self.main_self.params.keys()): + self.param_reference[col] = i + + # convert the input to useful format self._p = self._param_input_conversion() diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index ef77e841..2725cb5e 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -43,13 +43,9 @@ def scan_prepare(self): self.paramgrid_object = ParamGrid(self) self.param_log = self.paramgrid_object.param_log self.param_grid = self.paramgrid_object.param_grid + self.param_reference = self.paramgrid_object.param_reference del self.paramgrid_object - # creates a reference dictionary for column number to label - self.param_reference = {} - for i, col in enumerate(self.params.keys()): - self.param_reference[col] = i - self.round_counter = 0 self.peak_epochs = [] self.epoch_entropy = [] From 46084cc6178108d1cabeb56d7fb10642d7d2f6b8 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Wed, 27 Feb 2019 16:17:08 +0100 Subject: [PATCH 12/26] moved creation of param_grid into a function --- talos/parameters/ParamGrid.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index 11e04a6d..da678af4 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -43,14 +43,7 @@ def __init__(self, main_self): if self.main_self.round_limit is not None: final_grid_size = min(final_grid_size, self.main_self.round_limit) - # select premutations according to downsample - if final_grid_size < virtual_grid_size: - out = sample_reducer(self, final_grid_size, virtual_grid_size) - else: - out = range(0, final_grid_size) - - # build the parameter permutation grid - self.param_grid = self._create_param_permutations(ls, out) + self.param_grid=self._create_param_grid(ls,final_grid_size,virtual_grid_size) # initialize with random shuffle if needed if self.main_self.shuffle: @@ -62,6 +55,17 @@ def __init__(self, main_self): # add the log index to param grid self.param_grid = np.column_stack((self.param_grid, self.param_log)) + def _create_param_grid(self,ls,final_grid_size,virtual_grid_size): + # select premutations according to downsample + if final_grid_size < virtual_grid_size: + out = sample_reducer(self, final_grid_size, virtual_grid_size) + else: + out = range(0, final_grid_size) + + # build the parameter permutation grid + param_grid = self._create_param_permutations(ls, out) + return param_grid + def _create_param_permutations(self, ls, permutation_index): '''Expand params dictionary to permutations From 0c2d5da7a58459b3ac5344d0645363ce38e570bc Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Wed, 27 Feb 2019 16:17:55 +0100 Subject: [PATCH 13/26] Added premutation_filter functionality --- talos/parameters/ParamGrid.py | 16 +++++++++++++++- talos/scan/Scan.py | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index da678af4..dd4ae8fb 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -1,7 +1,7 @@ import numpy as np from ..reducers.sample_reducer import sample_reducer - +from .round_params import create_params_dict class ParamGrid: @@ -45,6 +45,20 @@ def __init__(self, main_self): self.param_grid=self._create_param_grid(ls,final_grid_size,virtual_grid_size) + fn=lambda i:self.main_self.premutation_filter(create_params_dict(self,i)) + if self.main_self.premutation_filter != None: + grid_indices=list(filter(fn,range(len(self.param_grid)))) + self.param_grid=self.param_grid[grid_indices] + final_expanded_grid_size=final_grid_size + while len(self.param_grid)virtual_grid_size: + final_expanded_grid_size=virtual_grid_size + self.param_grid=self._create_param_grid(ls,final_expanded_grid_size,virtual_grid_size) + grid_indices=list(filter(fn,range(len(self.param_grid)))) + self.param_grid=self.param_grid[grid_indices] + self.param_grid=self.param_grid[:final_grid_size] + # initialize with random shuffle if needed if self.main_self.shuffle: np.random.shuffle(self.param_grid) diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index a827b869..310f9ab5 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -126,6 +126,7 @@ def __init__(self, x, y, params, model, seed=None, search_method='random', max_iteration_start_time=None, + premutation_filter=None, reduction_method=None, reduction_interval=50, reduction_window=20, @@ -155,6 +156,7 @@ def __init__(self, x, y, params, model, self.random_method = random_method self.search_method = search_method self.max_iteration_start_time=max_iteration_start_time + self.premutation_filter=premutation_filter self.reduction_method = reduction_method self.reduction_interval = reduction_interval self.reduction_window = reduction_window From c51c1e90081d0c4e703475976e4430a86ce0f1a0 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 10:32:21 +0100 Subject: [PATCH 14/26] added test for premutation_filter --- test/core_tests/test_scan.py | 7 +++++++ test_script.py | 1 + 2 files changed, 8 insertions(+) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 08d705d2..580aac00 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -77,6 +77,13 @@ def test_scan_iris_2(self): experiment_no='000', model=ta.templates.models.iris, last_epoch_value=True) + def test_scan_iris_3(self): + print("Running Iris dataset test 2...") + Scan(self.x, self.y, params=p3, dataset_name='testing', + experiment_no='000', model=ta.templates.models.iris, + premutation_filter=lambda p: p['first_neuron']*p['hidden_layers']<150, + last_epoch_value=True) + def test_scan_iris_explicit_validation_set(self): print("Running explicit validation dataset test with metric reduction") Scan(self.x_train, self.y_train, params=p2, diff --git a/test_script.py b/test_script.py index cc176eab..d7860408 100644 --- a/test_script.py +++ b/test_script.py @@ -52,5 +52,6 @@ TestIris().test_scan_iris_explicit_validation_set_force_fail() TestIris().test_scan_iris_1() TestIris().test_scan_iris_2() + TestIris().test_scan_iris_3() TestReporting() TestLoadDatasets() From c99552c12f9c750a29967036d83ec25c255b6f5b Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 10:47:54 +0100 Subject: [PATCH 15/26] Fixed some PEP8 errors --- talos/parameters/ParamGrid.py | 39 ++++++++++++++++---------------- talos/parameters/round_params.py | 7 +++--- talos/scan/Scan.py | 4 ++-- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index dd4ae8fb..f53830f3 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -3,6 +3,7 @@ from ..reducers.sample_reducer import sample_reducer from .round_params import create_params_dict + class ParamGrid: '''Suite for handling parameters internally within Talos @@ -22,7 +23,6 @@ def __init__(self, main_self): for i, col in enumerate(self.main_self.params.keys()): self.param_reference[col] = i - # convert the input to useful format self._p = self._param_input_conversion() @@ -30,9 +30,9 @@ def __init__(self, main_self): ls = [list(self._p[key]) for key in self._p.keys()] # get the number of total dimensions / permutations - virtual_grid_size=1 + virtual_grid_size = 1 for l in ls: - virtual_grid_size*=len(l) + virtual_grid_size *= len(l) final_grid_size = virtual_grid_size # calculate the size of the downsample @@ -43,21 +43,22 @@ def __init__(self, main_self): if self.main_self.round_limit is not None: final_grid_size = min(final_grid_size, self.main_self.round_limit) - self.param_grid=self._create_param_grid(ls,final_grid_size,virtual_grid_size) - - fn=lambda i:self.main_self.premutation_filter(create_params_dict(self,i)) - if self.main_self.premutation_filter != None: - grid_indices=list(filter(fn,range(len(self.param_grid)))) - self.param_grid=self.param_grid[grid_indices] - final_expanded_grid_size=final_grid_size - while len(self.param_grid)virtual_grid_size: - final_expanded_grid_size=virtual_grid_size - self.param_grid=self._create_param_grid(ls,final_expanded_grid_size,virtual_grid_size) - grid_indices=list(filter(fn,range(len(self.param_grid)))) - self.param_grid=self.param_grid[grid_indices] - self.param_grid=self.param_grid[:final_grid_size] + self.param_grid = self._create_param_grid(ls, final_grid_size, virtual_grid_size) + + def fn(i): + return self.main_self.premutation_filter(create_params_dict(self, i)) + if self.main_self.premutation_filter is not None: + grid_indices = list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + final_expanded_grid_size = final_grid_size + while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: + final_expanded_grid_size *= 2 + if final_expanded_grid_size > virtual_grid_size: + final_expanded_grid_size = virtual_grid_size + self.param_grid = self._create_param_grid(ls, final_expanded_grid_size, virtual_grid_size) + grid_indices=list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + self.param_grid = self.param_grid[:final_grid_size] # initialize with random shuffle if needed if self.main_self.shuffle: @@ -69,7 +70,7 @@ def __init__(self, main_self): # add the log index to param grid self.param_grid = np.column_stack((self.param_grid, self.param_log)) - def _create_param_grid(self,ls,final_grid_size,virtual_grid_size): + def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): # select premutations according to downsample if final_grid_size < virtual_grid_size: out = sample_reducer(self, final_grid_size, virtual_grid_size) diff --git a/talos/parameters/round_params.py b/talos/parameters/round_params.py index bae794aa..52d860fa 100644 --- a/talos/parameters/round_params.py +++ b/talos/parameters/round_params.py @@ -1,6 +1,7 @@ from numpy import random -def create_params_dict(self,_choice): + +def create_params_dict(self, _choice): _round_params_dict = {} x = 0 for key in self.param_reference.keys(): @@ -9,6 +10,7 @@ def create_params_dict(self,_choice): return _round_params_dict + def round_params(self): '''Picks the paramaters for a round based on the available @@ -28,5 +30,4 @@ def round_params(self): self.param_log.remove(_choice) # create a dictionary for the current round - return create_params_dict(self,_choice) - + return create_params_dict(self, _choice) diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 310f9ab5..d56ab9d6 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -155,8 +155,8 @@ def __init__(self, x, y, params, model, self.shuffle = shuffle self.random_method = random_method self.search_method = search_method - self.max_iteration_start_time=max_iteration_start_time - self.premutation_filter=premutation_filter + self.max_iteration_start_time = max_iteration_start_time + self.premutation_filter = premutation_filter self.reduction_method = reduction_method self.reduction_interval = reduction_interval self.reduction_window = reduction_window From fc709df0e3a5b767b96347a420d6343d71087a88 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 10:51:01 +0100 Subject: [PATCH 16/26] simplified test case to speed it up --- test/core_tests/test_scan.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 580aac00..5f05fb56 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -58,6 +58,20 @@ 'activation': [relu], 'last_activation': [sigmoid]} +p4 = {'lr': [1], + 'first_neuron': [8, 32, 64], + 'hidden_layers': [2, 4, 5], + 'batch_size': [30], + 'epochs': [3], + 'dropout': [0], + 'weight_regulizer': [None], + 'shapes': ['funnel'], + 'emb_output_dims': [None], + 'optimizer': [Nadam], + 'losses': [softmax], + 'activation': [relu], + 'last_activation': [sigmoid]} + class TestIris: From 479ab70ba3e78e301bcbd738f68be88daca9733d Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 12:36:43 +0100 Subject: [PATCH 17/26] fixed test --- test/core_tests/test_scan.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 5f05fb56..aa9afa89 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -59,8 +59,8 @@ 'last_activation': [sigmoid]} p4 = {'lr': [1], - 'first_neuron': [8, 32, 64], - 'hidden_layers': [2, 4, 5], + 'first_neuron': [8, 64], + 'hidden_layers': [2, 5], 'batch_size': [30], 'epochs': [3], 'dropout': [0], @@ -92,8 +92,8 @@ def test_scan_iris_2(self): last_epoch_value=True) def test_scan_iris_3(self): - print("Running Iris dataset test 2...") - Scan(self.x, self.y, params=p3, dataset_name='testing', + print("Running Iris dataset test 3...") + Scan(self.x, self.y, params=p4, dataset_name='testing', experiment_no='000', model=ta.templates.models.iris, premutation_filter=lambda p: p['first_neuron']*p['hidden_layers']<150, last_epoch_value=True) From 4704539e860861b9a0873202a40c0b68a7183028 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 12:47:07 +0100 Subject: [PATCH 18/26] fix for test --- test/core_tests/test_scan.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index aa9afa89..1e28d9be 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -68,7 +68,7 @@ 'shapes': ['funnel'], 'emb_output_dims': [None], 'optimizer': [Nadam], - 'losses': [softmax], + 'losses': [categorical_crossentropy], 'activation': [relu], 'last_activation': [sigmoid]} @@ -96,6 +96,7 @@ def test_scan_iris_3(self): Scan(self.x, self.y, params=p4, dataset_name='testing', experiment_no='000', model=ta.templates.models.iris, premutation_filter=lambda p: p['first_neuron']*p['hidden_layers']<150, + round_limit=1, last_epoch_value=True) def test_scan_iris_explicit_validation_set(self): From abb078ed27dbf9d30bada1b4f52db2954751d334 Mon Sep 17 00:00:00 2001 From: Johan Mollevik Date: Fri, 1 Mar 2019 12:55:37 +0100 Subject: [PATCH 19/26] fix for error in reporting for test with only one premutation --- test/core_tests/test_scan.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 1e28d9be..959a5b70 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -59,10 +59,10 @@ 'last_activation': [sigmoid]} p4 = {'lr': [1], - 'first_neuron': [8, 64], - 'hidden_layers': [2, 5], - 'batch_size': [30], - 'epochs': [3], + 'first_neuron': [8, 24, 64], + 'hidden_layers': [2, 5, 10], + 'batch_size': [15, 30], + 'epochs': [1], 'dropout': [0], 'weight_regulizer': [None], 'shapes': ['funnel'], @@ -96,7 +96,7 @@ def test_scan_iris_3(self): Scan(self.x, self.y, params=p4, dataset_name='testing', experiment_no='000', model=ta.templates.models.iris, premutation_filter=lambda p: p['first_neuron']*p['hidden_layers']<150, - round_limit=1, + round_limit=4, last_epoch_value=True) def test_scan_iris_explicit_validation_set(self): From 9787ff150e185ff5a51a25c69978d2c50621ce79 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Sat, 2 Mar 2019 11:27:53 +0200 Subject: [PATCH 20/26] testing restructure and permutation_filter - moved permutation_filter to its own function. The question here is that would it be meaningful to support boolean statement directly, or is there benefit from having the lambda? - premutation >> permutation - fixed the case where unrecognized random_method failed silently, no warns the user and falls back to 'uniform_mersenne' - added missing doctrings to Scan() - the tests are now organized in a meaningful way: """ The tests below have to serve several purpose: - test possible input methods to params dict - test binary, multi class, multi label and continuous problems - test all Scan arguments Each problem type is presented as a Class, and contains three experiments using single, list, or range inputs. There is an effort to test as many scenarios as possible here, so be inventive / experiment! Doing well with this part of the testing, there is a healthy base for a more serious approach to ensuring procedural integrity. """ --- talos/parameters/ParamGrid.py | 23 +- talos/parameters/permutation_filter.py | 32 ++ talos/reducers/sample_reducer.py | 3 + talos/scan/Scan.py | 80 ++--- test/core_tests/test_auto_scan.py | 3 +- test/core_tests/test_scan.py | 386 ++++++++++++++++--------- test_script.py | 38 ++- 7 files changed, 358 insertions(+), 207 deletions(-) create mode 100644 talos/parameters/permutation_filter.py diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index f53830f3..e8c5f748 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -2,6 +2,7 @@ from ..reducers.sample_reducer import sample_reducer from .round_params import create_params_dict +from .permutation_filter import permutation_filter class ParamGrid: @@ -43,22 +44,12 @@ def __init__(self, main_self): if self.main_self.round_limit is not None: final_grid_size = min(final_grid_size, self.main_self.round_limit) + # create the params grid self.param_grid = self._create_param_grid(ls, final_grid_size, virtual_grid_size) - def fn(i): - return self.main_self.premutation_filter(create_params_dict(self, i)) - if self.main_self.premutation_filter is not None: - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - final_expanded_grid_size = final_grid_size - while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: - final_expanded_grid_size *= 2 - if final_expanded_grid_size > virtual_grid_size: - final_expanded_grid_size = virtual_grid_size - self.param_grid = self._create_param_grid(ls, final_expanded_grid_size, virtual_grid_size) - grid_indices=list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - self.param_grid = self.param_grid[:final_grid_size] + # handle the case where permutation filter is provided + if self.main_self.permutation_filter is not None: + self = permutation_filter(self, ls, final_grid_size, virtual_grid_size) # initialize with random shuffle if needed if self.main_self.shuffle: @@ -71,7 +62,8 @@ def fn(i): self.param_grid = np.column_stack((self.param_grid, self.param_log)) def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): - # select premutations according to downsample + + # select permutations according to downsample if final_grid_size < virtual_grid_size: out = sample_reducer(self, final_grid_size, virtual_grid_size) else: @@ -79,6 +71,7 @@ def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): # build the parameter permutation grid param_grid = self._create_param_permutations(ls, out) + return param_grid def _create_param_permutations(self, ls, permutation_index): diff --git a/talos/parameters/permutation_filter.py b/talos/parameters/permutation_filter.py new file mode 100644 index 00000000..44dbc8c8 --- /dev/null +++ b/talos/parameters/permutation_filter.py @@ -0,0 +1,32 @@ +def permutation_filter(self, ls, final_grid_size, virtual_grid_size): + + '''Handles the filtering for ta.Scan(... permutation_filter= ...)''' + + from .round_params import create_params_dict + + # handle the filtering with the current params grid + + def fn(i): + + params_dict = create_params_dict(self, i) + fn = self.main_self.permutation_filter(params_dict) + + return fn + + grid_indices = list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + final_expanded_grid_size = final_grid_size + + while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: + final_expanded_grid_size *= 2 + + if final_expanded_grid_size > virtual_grid_size: + final_expanded_grid_size = virtual_grid_size + + self.param_grid = self._create_param_grid(ls, final_expanded_grid_size, virtual_grid_size) + grid_indices=list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + + self.param_grid = self.param_grid[:final_grid_size] + + return self \ No newline at end of file diff --git a/talos/reducers/sample_reducer.py b/talos/reducers/sample_reducer.py index 1a36ed6c..824dbb35 100644 --- a/talos/reducers/sample_reducer.py +++ b/talos/reducers/sample_reducer.py @@ -57,5 +57,8 @@ def sample_reducer(self, length, max_value): out = r.uniform_crypto() elif random_method == 'ambience': out = r.ambience() + else: + print('check random_method, no eligble method found. Using uniform mersenne.') + out = r.uniform_mersenne() return out diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index d56ab9d6..bb1e0358 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -5,43 +5,42 @@ class Scan: - """Suite of operations for training and evaluating Keras neural networks. + """Hyperparamater scanning and optimization - Inputs train/dev data and a set of parameters as a dictionary. The name and - experiment number must also be chosen since they define the output - filenames. The model must also be specified of the form + USE: ta.Scan(x=x, y=y, params=params_dict, model=model) - my_model(x_train, y_train, x_val, y_val, params), + Takes in a Keras model, and a dictionary with the parameter + boundaries for the experiment. - and the dictionary - - d = { - 'fcc_layer_1_N': [50, 100, 200], - 'fcc_layer_1_act': ['relu', 'tanh'], - 'fcc_layer_1_dropout': (0, 0.1, 5) # 5 points between 0 and 0.1 + p = { + 'epochs' : [50, 100, 200], + 'activation' : ['relu'], + 'dropout': (0, 0.1, 5) } + + Accepted input formats are [1] single value in a list, [0.1, 0.2] + multiple values in a list, and (0, 0.1, 5) a range of 5 values + from 0 to 0.1. + + Here is an example of the input model: + + def model(): + + # any Keras model - The dictionary is parsed for every run and only one entry per parameter - is fed into the neural network at a time. + return out, model - Important note: the user has two options when specifying input data. - Option 1: - Specify x, y and val_split. The training and validation data mixture - (x, y) will be randomly split into the training and validation datasets - as per the split specified in val_split. + You must replace the parameters in the model with references to + the dictionary, for example: - Option 2: - Specify x, y and x_val, y_val. This would allow the user to specify - their own validation datasets. Keras by default shuffles data during - training, so the user need only be sure that the split specified is - correct. This allows for not only reproducibility, but randomizing the - data on the user's own terms. This is critical if the user wishes to - augment their training data without augmenting their validation data - (which is the only acceptable practice!). + model.fit(epochs=params['epochs']) + To learn more, start from the examples and documentation + available here: https://github.com/autonomio/talos - Parameters + + PARAMETERS ---------- x : ndarray 1d or 2d array consisting of the training data. `x` should have the @@ -55,15 +54,18 @@ class Scan: params : python dictionary Lists all permutations of hyperparameters, a subset of which will be selected at random for training and evaluation. + model : keras model + Any Keras model with relevant declrations like params['first_neuron'] dataset_name : str References the name of the experiment. The dataset_name and experiment_no will be concatenated to produce the file name for the results saved in the local directory. experiment_no : str Indexes the user's choice of experiment number. - model : keras_model - A Keras style model which compiles and fits the data, and returns - the history and compiled model. + x_val : ndarray + User specified cross-validation data. (Default is None). + y_val : ndarray + User specified cross-validation labels. (Default is None). val_split : float, optional The proportion of the input `x` which is set aside as the validation data. (Default is 0.3). @@ -73,12 +75,20 @@ class Scan: random_method : uniform, stratified, lhs, lhs_sudoku Determinines the way in which the grid_downsample is applied. The default setting is 'uniform'. + seed : int + Sets numpy random seed. search_method : {None, 'random', 'linear', 'reverse'} Determines the random sampling of the dictionary. `random` picks one hyperparameter point at random and removes it from the list, then samples again. `linear` starts from the start of the grid and moves forward, and `reverse` starts at the end of the grid and moves backwards. + max_iteration_start_time : None or str + Allows setting a time when experiment will be completed. + permutation_filter : lambda function + Use it to filter permutations based on previous knowledge. + USE: permutation_filter=lambda p: p['batch_size'] < 150 + This example removes any permutation where batch_size is below 150 reduction_method : {None, 'correlation'} Method for honing in on the optimal hyperparameter subspace. (Default is None). @@ -94,13 +104,11 @@ class Scan: Limits the number of rounds (permutations) in the experiment. reduction_metric : {'val_acc'} Metric used to tune the reductions. - x_val : ndarray - User specified cross-validation data. (Default is None). - y_val : ndarray - User specified cross-validation labels. (Default is None). last_epoch_value : bool Set to True if the last epoch metric values are logged as opposed to the default which is peak epoch values for each round. + disable_progress_bar : bool + Disable TQDM live progress bar. print_params : bool Print params for each round on screen (useful when using TrainingLog callback for visualization) @@ -126,7 +134,7 @@ def __init__(self, x, y, params, model, seed=None, search_method='random', max_iteration_start_time=None, - premutation_filter=None, + permutation_filter=None, reduction_method=None, reduction_interval=50, reduction_window=20, @@ -156,7 +164,7 @@ def __init__(self, x, y, params, model, self.random_method = random_method self.search_method = search_method self.max_iteration_start_time = max_iteration_start_time - self.premutation_filter = premutation_filter + self.permutation_filter = permutation_filter self.reduction_method = reduction_method self.reduction_interval = reduction_interval self.reduction_window = reduction_window diff --git a/test/core_tests/test_auto_scan.py b/test/core_tests/test_auto_scan.py index 2be1c1f6..eef41b74 100644 --- a/test/core_tests/test_auto_scan.py +++ b/test/core_tests/test_auto_scan.py @@ -16,6 +16,7 @@ def test_auto_scan(): for key in p.keys(): p[key] = [p[key][0]] - ta.Scan(x, y, p, ta.KerasModel().model) + ta.Scan(x, y, p, ta.KerasModel().model, + permutation_filter=lambda p: p['batch_size'] < 150,) return "Finished testing auto Scan()" diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 959a5b70..b8bdb1b9 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -2,13 +2,13 @@ from __future__ import print_function -from keras.losses import categorical_crossentropy, logcosh -from keras.losses import binary_crossentropy +from keras.losses import binary_crossentropy, sparse_categorical_crossentropy +from keras.losses import categorical_crossentropy, mean_squared_error from keras.optimizers import SGD, Adam, Adadelta, Adagrad from keras.optimizers import Adamax, RMSprop, Nadam -from keras.activations import softmax, relu, sigmoid +from keras.activations import relu, sigmoid -from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split as splt from talos.scan.Scan import Scan from talos.commands.reporting import Reporting @@ -16,154 +16,261 @@ import talos as ta -p1 = {'lr': [1], - 'first_neuron': [4], - 'hidden_layers': [2], - 'batch_size': [50], - 'epochs': [1], - 'dropout': [0], - 'shapes': ['brick', 'funnel', 'triangle', 0.2], - 'optimizer': [Adam], - 'losses': [categorical_crossentropy], - 'activation': [relu], - 'last_activation': [softmax], - 'weight_regulizer': [None], - 'emb_output_dims': [None]} - -p2 = {'lr': [1], - 'first_neuron': [4], - 'hidden_layers': [2], - 'batch_size': [50], - 'epochs': [1], - 'dropout': [0], - 'shapes': ['brick'], - 'optimizer': [Adam, Adagrad, Adamax, RMSprop, Adadelta, Nadam, SGD], - 'losses': [categorical_crossentropy], - 'activation': [relu], - 'last_activation': [softmax], - 'weight_regulizer': [None], - 'emb_output_dims': [None]} - -p3 = {'lr': (0.5, 5, 10), - 'first_neuron': [4, 8, 16, 32, 64], - 'hidden_layers': [2, 3, 4, 5], - 'batch_size': (2, 30, 10), - 'epochs': [3], - 'dropout': (0, 0.5, 5), - 'weight_regulizer': [None], - 'shapes': ['funnel'], - 'emb_output_dims': [None], - 'optimizer': [Nadam], - 'losses': [logcosh, binary_crossentropy], - 'activation': [relu], - 'last_activation': [sigmoid]} - -p4 = {'lr': [1], - 'first_neuron': [8, 24, 64], - 'hidden_layers': [2, 5, 10], - 'batch_size': [15, 30], - 'epochs': [1], - 'dropout': [0], - 'weight_regulizer': [None], - 'shapes': ['funnel'], - 'emb_output_dims': [None], - 'optimizer': [Nadam], - 'losses': [categorical_crossentropy], - 'activation': [relu], - 'last_activation': [sigmoid]} - - -class TestIris: +# single values +def values_single_params(): + return {'lr': [1], + 'first_neuron': [4], + 'hidden_layers': [2], + 'batch_size': [100], + 'epochs': [2], + 'dropout': [0], + 'shapes': ['brick'], + 'optimizer': [Adam], + 'losses': [binary_crossentropy, + sparse_categorical_crossentropy, + categorical_crossentropy, + mean_squared_error], + 'activation': ['relu'], + 'last_activation': ['softmax']} + + +# lists of values +def values_list_params(): + return {'lr': [1, 2], + 'first_neuron': [4, 4], + 'hidden_layers': [2, 2], + 'batch_size': [100, 200], + 'epochs': [1, 2], + 'dropout': [0, 0.1], + 'shapes': ['brick', 'funnel', 'triangle', 0.2], + 'optimizer': [Adam, Adagrad, Adamax, RMSprop, Adadelta, Nadam, SGD], + 'losses': ['binary_crossentropy', + 'sparse_categorical_crossentropy', + 'categorical_crossentropy', + 'mean_squared_error'], + 'activation': ['relu', 'elu'], + 'last_activation': ['softmax']} + + +# range of values +def values_range_params(): + return {'lr': (0.5, 5, 10), + 'first_neuron': (4, 100, 5), + 'hidden_layers': (0, 5, 5), + 'batch_size': (200, 300, 10), + 'epochs': (1, 5, 4), + 'dropout': (0, 0.5, 5), + 'shapes': ['funnel'], + 'optimizer': [Nadam], + 'losses': [binary_crossentropy, + sparse_categorical_crossentropy, + categorical_crossentropy, + mean_squared_error], + 'activation': [relu], + 'last_activation': [sigmoid]} + + +""" +The tests below have to serve several purpose: + +- test possible input methods to params dict +- test binary, multi class, multi label and continuous problems +- test all Scan arguments + +Each problem type is presented as a Class, and contains three +experiments using single, list, or range inputs. There is an +effort to test as many scenarios as possible here, so be +inventive / experiment! Doing well with this part of the testing, +there is a healthy base for a more serious approach to ensuring +procedural integrity. + +""" + + +def get_params(task): + + """ + + Helper that allows the tests to feed from same + params dictionaries. + + USE: values_single, values_list, values_range = get_appropriate_loss(0) + + 0 = binary + 1 = 1d multi class + 2 = 2d multi label + 3 = continuous / regression + + """ + + # first create the params dict + values_single = values_single_params() + values_list = values_list_params() + values_range = values_range_params() + + # then limit the losses according to prediction task + values_single['losses'] = [values_single_params()['losses'][task]] + values_list['losses'] = [values_list_params()['losses'][task]] + values_range['losses'] = [values_range_params()['losses'][task]] + + return values_single, values_list, values_range + + +class BinaryTest: def __init__(self): - self.x, self.y = ta.templates.datasets.iris() - self.x_train, self.x_dev, self.y_train, self.y_dev \ - = train_test_split(self.x, self.y, test_size=0.2) - - def test_scan_iris_1(self): - print("Running Iris dataset test 1...") - Scan(self.x, self.y, params=p1, dataset_name='testing', - experiment_no='000', model=ta.templates.models.iris) - - def test_scan_iris_2(self): - print("Running Iris dataset test 2...") - Scan(self.x, self.y, params=p2, dataset_name='testing', - experiment_no='000', model=ta.templates.models.iris, - last_epoch_value=True) - - def test_scan_iris_3(self): - print("Running Iris dataset test 3...") - Scan(self.x, self.y, params=p4, dataset_name='testing', - experiment_no='000', model=ta.templates.models.iris, - premutation_filter=lambda p: p['first_neuron']*p['hidden_layers']<150, - round_limit=4, - last_epoch_value=True) - - def test_scan_iris_explicit_validation_set(self): - print("Running explicit validation dataset test with metric reduction") - Scan(self.x_train, self.y_train, params=p2, - dataset_name='testing', - experiment_no='000', model=ta.templates.models.iris, - x_val=self.x_dev, y_val=self.y_dev) - - def test_scan_iris_explicit_validation_set_force_fail(self): - print("Running explicit validation dataset test with loss reduction") - try: - Scan(self.x_train, self.y_train, params=p2, - dataset_name='testing', - experiment_no='000', model=ta.templates.models.iris, - y_val=self.y_dev) - except RuntimeError: - pass - - -class TestCancer: - def __init__(self): + # read the params dictionary with the right loss + self.values_single, self.values_list, self.values_range = get_params(0) + + # prepare the data for the experiment self.x, self.y = ta.templates.datasets.cervical_cancer() + self.x = self.x[:300] + self.y = self.y[:300] self.model = ta.templates.models.cervical_cancer - def test_scan_cancer_metric_reduction(self): - print("Running Cervical Cancer dataset test...") - Scan(self.x, self.y, grid_downsample=0.00025, params=p3, - dataset_name='testing', experiment_no='a', - model=self.model, - random_method='latin_sudoku', - reduction_threshold=0.01, + # split validation data + self.x_train, self.x_val, self.y_train, self.y_val = splt(self.x, + self.y, + test_size=0.2) + + def values_single_test(self): + print("BinaryTest : Running values_single_test...") + + Scan(self.x, + self.y, + params=self.values_single, + model=ta.templates.models.cervical_cancer) + + def values_list_test(self): + print("BinaryTest : Running values_list_test...") + Scan(self.x_train, + self.y_train, + x_val=self.x_val, + y_val=self.y_val, + params=self.values_list, + round_limit=2, + dataset_name='BinaryTest', + experiment_no='000', + model=ta.templates.models.cervical_cancer, + random_method='crypto_uniform', + seed=2423, + search_method='linear', reduction_method='correlation', - reduction_interval=2) - - def test_scan_cancer_loss_reduction(self): - print("Running Cervical Cancer dataset test...") - Scan(self.x, self.y, grid_downsample=0.00025, params=p3, - dataset_name='testing', experiment_no='a', - model=self.model, - random_method='sobol', + reduction_interval=2, + reduction_window=2, + reduction_threshold=0.2, reduction_metric='val_loss', - reduction_threshold=0.01, + reduce_loss=True, + last_epoch_value=True, + clear_tf_session=False, + disable_progress_bar=True, + debug=True) + + # comprehensive + def values_range_test(self): + print("BinaryTest : Running values_range_test...") + Scan(self.x_train, + self.y_train, + params=self.values_range, + model=ta.templates.models.cervical_cancer, + grid_downsample=0.0001, + permutation_filter=lambda p: p['first_neuron'] * p['hidden_layers'] < 220, + random_method='sobol', reduction_method='correlation', - reduction_interval=2) + reduction_interval=2, + reduction_window=2, + reduction_threshold=0.2, + reduction_metric='val_acc', + reduce_loss=False, + debug=True) - def test_linear_method(self): - print("Testing linear method on Cancer dataset...") - Scan(self.x, self.y, params=p3, dataset_name='testing', - search_method='linear', grid_downsample=0.00025, - experiment_no='000', model=self.model, - random_method='quantum') - def test_reverse_method(self): - print("Testing reverse method on Cancer dataset...") - Scan(self.x, self.y, params=p3, dataset_name='testing', - search_method='reverse', grid_downsample=0.00025, - experiment_no='000', model=self.model) +class MultiLabelTest: + + def __init__(self): + + # read the params dictionary with the right loss + self.values_single, self.values_list, self.values_range = get_params(2) + + self.x, self.y = ta.templates.datasets.iris() + self.x_train, self.x_val, self.y_train, self.y_val = splt(self.x, + self.y, + test_size=0.2) + + def values_single_test(self): + print("MultiLabelTest : Running values_single_test...") + Scan(self.x, + self.y, + params=self.values_single, + model=ta.templates.models.iris) + + def values_list_test(self): + print("MultiLabelTest : Running values_list_test...") + Scan(self.x, + self.y, + x_val=self.x_val, + y_val=self.y_val, + params=self.values_list, + round_limit=2, + dataset_name='MultiLabelTest', + experiment_no='000', + model=ta.templates.models.iris, + random_method='crypto_uniform', + seed=2423, + search_method='linear', + max_iteration_start_time=None, + permutation_filter=lambda p: p['first_neuron'] * p['hidden_layers'] < 9, + reduction_method='correlation', + reduction_interval=2, + reduction_window=2, + reduction_threshold=0.2, + reduction_metric='val_loss', + reduce_loss=True, + last_epoch_value=True, + clear_tf_session=False, + disable_progress_bar=True, + debug=True) + + # comprehensive + def values_range_test(self): + print("MultiLabelTest : Running values_range_test...") + Scan(self.x, + self.y, + params=self.values_range, + model=ta.templates.models.iris, + grid_downsample=0.0001, + random_method='sobol', + reduction_method='correlation', + reduction_interval=2, + reduction_window=2, + reduction_threshold=0.2, + reduction_metric='val_acc', + reduce_loss=False, + debug=True) -class TestReporting: +class ReportingTest: def __init__(self): - print("Testing Reporting...") - r = Reporting('testing_000.csv') + print("ReportingTest : Running Binary test...") + + r = Reporting('BinaryTest_000.csv') + + x = r.data + x = r.correlate() + x = r.high() + x = r.low() + x = r.rounds() + x = r.rounds2high() + x = r.best_params() + x = r.plot_corr() + x = r.plot_hist() + x = r.plot_line() + + print("ReportingTest : Running MultiLabel test...") + r = Reporting('MultiLabelTest_000.csv') x = r.data x = r.correlate() @@ -179,10 +286,11 @@ def __init__(self): del x -class TestLoadDatasets: +class DatasetTest: def __init__(self): - print("Testing Load Datasets...") + + print("DatasetTest : Running tests...") x = ta.templates.datasets.icu_mortality() x = ta.templates.datasets.icu_mortality(100) x = ta.templates.datasets.titanic() @@ -190,4 +298,4 @@ def __init__(self): x = ta.templates.datasets.cervical_cancer() x = ta.templates.datasets.breast_cancer() x = ta.templates.params.iris() - x = ta.templates.params.breast_cancer() # noqa + x = ta.templates.params.breast_cancer() diff --git a/test_script.py b/test_script.py index d7860408..a88bdf04 100644 --- a/test_script.py +++ b/test_script.py @@ -3,8 +3,6 @@ import talos as ta -from test.core_tests.test_scan import TestIris, TestCancer -from test.core_tests.test_scan import TestReporting, TestLoadDatasets from test.core_tests.test_scan_object import test_scan_object from test.core_tests.test_reporting_object import test_reporting_object from test.core_tests.test_random_methods import test_random_methods @@ -20,15 +18,35 @@ '''NOTE: test/core_tests/test_scan.py needs to be edited as well!''' - # Scan + # testing different model types + from test.core_tests.test_scan import BinaryTest, MultiLabelTest + + BinaryTest().values_single_test() + BinaryTest().values_list_test() + BinaryTest().values_range_test() + + MultiLabelTest().values_single_test() + MultiLabelTest().values_list_test() + MultiLabelTest().values_range_test() + + # reporting specific testing + from test.core_tests.test_scan import ReportingTest, DatasetTest + + ReportingTest() + DatasetTest() + + # MOVE TO command specific tests + + # Scan() object tests scan_object = test_scan_object() - # Reporting + # reporting tests test_reporting_object(scan_object) test_params_object() test_auto_scan() test_templates() + # create a string for name of deploy file start_time = str(time.strftime("%s")) p = ta.Predict(scan_object) @@ -43,15 +61,3 @@ test_random_methods() fit_generator = generator(scan_object.x, scan_object.y, 20) force_cpu() - - TestCancer().test_scan_cancer_metric_reduction() - TestCancer().test_scan_cancer_loss_reduction() - TestCancer().test_linear_method() - TestCancer().test_reverse_method() - TestIris().test_scan_iris_explicit_validation_set() - TestIris().test_scan_iris_explicit_validation_set_force_fail() - TestIris().test_scan_iris_1() - TestIris().test_scan_iris_2() - TestIris().test_scan_iris_3() - TestReporting() - TestLoadDatasets() From cfde7d2f3f4ac8a082e50efc0599ec98b0734eee Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Sat, 2 Mar 2019 11:45:38 +0200 Subject: [PATCH 21/26] small edit to tests --- test/core_tests/test_scan.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index b8bdb1b9..be912d59 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -149,7 +149,7 @@ def values_list_test(self): x_val=self.x_val, y_val=self.y_val, params=self.values_list, - round_limit=2, + round_limit=5, dataset_name='BinaryTest', experiment_no='000', model=ta.templates.models.cervical_cancer, @@ -212,9 +212,7 @@ def values_list_test(self): x_val=self.x_val, y_val=self.y_val, params=self.values_list, - round_limit=2, - dataset_name='MultiLabelTest', - experiment_no='000', + round_limit=5, model=ta.templates.models.iris, random_method='crypto_uniform', seed=2423, From cdcf7e9a6e263cf25984b6aa1708f56e5ba37992 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Sat, 2 Mar 2019 11:50:43 +0200 Subject: [PATCH 22/26] fixed issue in test --- test/core_tests/test_scan.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index be912d59..9214010b 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -213,6 +213,8 @@ def values_list_test(self): y_val=self.y_val, params=self.values_list, round_limit=5, + dataset_name='MultiLabelTest', + experiment_no='000', model=ta.templates.models.iris, random_method='crypto_uniform', seed=2423, From 09ef9ca3823277ab2a1800c3da0e669c85bc4684 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 4 Mar 2019 12:39:57 +0200 Subject: [PATCH 23/26] clean up / pepify --- .gitignore | 1 + talos/commands/deploy.py | 9 ++++----- talos/commands/kerasmodel.py | 3 ++- talos/commands/predict.py | 5 ----- talos/commands/reporting.py | 3 +-- talos/commands/restore.py | 12 ++++++++++-- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 0aedd6eb..a15bf913 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ talos.egg-info *.h5 *.json *.npy +test.sh diff --git a/talos/commands/deploy.py b/talos/commands/deploy.py index 9e151acd..1e515070 100644 --- a/talos/commands/deploy.py +++ b/talos/commands/deploy.py @@ -12,11 +12,10 @@ class Deploy: def __init__(self, scan_object, model_name, metric='val_acc', asc=False): - '''Deploy a model to be used later or in a different system. NOTE: for a metric that is to be minimized, set asc=True or otherwise - you will end up with the model that has the highest loss. + you will end up with the model that has the highest loss. Deploy() takes in the object from Scan() and creates a package locally that can be later activated with Restore(). @@ -26,10 +25,10 @@ def __init__(self, scan_object, model_name, metric='val_acc', asc=False): model_name : str Name for the .zip file to be created. metric : str - The metric to be used for picking the best model. + The metric to be used for picking the best model. asc: bool - Make this True for metrics that are to be minimized (e.g. loss) , and - False when the metric is to be maximized (e.g. acc) + Make this True for metrics that are to be minimized (e.g. loss) , + and False when the metric is to be maximized (e.g. acc) ''' diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py index 43c452f6..fb48764a 100644 --- a/talos/commands/kerasmodel.py +++ b/talos/commands/kerasmodel.py @@ -73,7 +73,8 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): activation=params['last_activation'])) # bundle the optimizer with learning rate changes - optimizer = params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])) + optimizer = params['optimizer'](lr=lr_normalizer(params['lr'], + params['optimizer'])) # compile the model model.compile(optimizer=optimizer, diff --git a/talos/commands/predict.py b/talos/commands/predict.py index 725e062f..e46926e2 100644 --- a/talos/commands/predict.py +++ b/talos/commands/predict.py @@ -1,8 +1,3 @@ -from numpy import mean, std - -from sklearn.metrics import f1_score - -from ..utils.validation_split import kfold from ..utils.best_model import best_model, activate_model diff --git a/talos/commands/reporting.py b/talos/commands/reporting.py index 5832497b..e7d1485e 100644 --- a/talos/commands/reporting.py +++ b/talos/commands/reporting.py @@ -1,11 +1,10 @@ from pandas import read_csv from ..utils.connection_check import is_connected +from ..metrics.names import metric_names if is_connected() is True: from astetik import line, hist, corr, regs, bargrid, kde, box -from ..metrics.names import metric_names - class Reporting: diff --git a/talos/commands/restore.py b/talos/commands/restore.py index ca262fc8..1b185791 100644 --- a/talos/commands/restore.py +++ b/talos/commands/restore.py @@ -8,8 +8,16 @@ class Restore: - '''Utility class for restoring the assets from Deploy() - package.''' + '''Restores the scan_object that had been stored locally as a result + of talos.Deploy(scan_object, 'example') + + USE: + + diabetes = ta.Scan(x, y, p, input_model) + ta.Deploy(diabetes, 'diabetes') + ta.Restore('diabetes.zip') + + ''' def __init__(self, path_to_zip): From 93863ec8236d67b0ae86fd188487e1b14de88f3c Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 4 Mar 2019 14:51:44 +0200 Subject: [PATCH 24/26] cleanup and fixes - moved permutation_filter to /reducers - enabled round duration and times (as attribute in Scan() object) - went through all the files for pep issues ... only some borderline long rows remain, otherwise pepified - changed max_start_... to time_limit for consistency with round_limit - edited tests to support the changes --- talos/commands/__init__.py | 1 - talos/commands/evaluate.py | 16 +++++++----- talos/metrics/__init__.py | 1 - talos/model/__init__.py | 1 - talos/parameters/ParamGrid.py | 7 +++--- talos/parameters/permutation_filter.py | 32 ----------------------- talos/reducers/permutation_filter.py | 35 ++++++++++++++++++++++++++ talos/reducers/sample_reducer.py | 1 - talos/scan/Scan.py | 29 ++++++++++----------- talos/scan/scan_finish.py | 19 +++++++------- talos/scan/scan_prepare.py | 13 +++++----- talos/scan/scan_round.py | 10 +++++--- talos/scan/scan_run.py | 16 ++++++------ talos/utils/connection_check.py | 2 +- talos/utils/generator.py | 5 ++-- test/core_tests/test_scan.py | 1 - 16 files changed, 96 insertions(+), 93 deletions(-) delete mode 100644 talos/parameters/permutation_filter.py create mode 100644 talos/reducers/permutation_filter.py diff --git a/talos/commands/__init__.py b/talos/commands/__init__.py index 8b137891..e69de29b 100644 --- a/talos/commands/__init__.py +++ b/talos/commands/__init__.py @@ -1 +0,0 @@ - diff --git a/talos/commands/evaluate.py b/talos/commands/evaluate.py index fecd70f6..8d11e9cc 100644 --- a/talos/commands/evaluate.py +++ b/talos/commands/evaluate.py @@ -36,19 +36,21 @@ def evaluate(self, x, y, y : array The ground truth for x model_id : int - It's possible to evaluate a specific model based on ID. Can be None. + It's possible to evaluate a specific model based on ID. + Can be None. folds : int Number of folds to use for cross-validation sort_metric : string - A column name referring to the metric that was used in the scan_object - as a performance metric. This is used for sorting the results to pick - for evaluation. + A column name referring to the metric that was used in the + scan_object as a performance metric. This is used for sorting + the results to pick for evaluation. shuffle : bool Data is shuffled before evaluation. mode : string 'binary', 'multi_class', 'multi_label', or 'regression'. asc : bool - False if the metric is to be optimized upwards (e.g. accuracy or f1_score) + False if the metric is to be optimized upwards + (e.g. accuracy or f1_score) print_out : bool Print out the results. @@ -78,7 +80,9 @@ def evaluate(self, x, y, if mode == 'multi_label': y_pred = model.predict(kx[i]).argmax(axis=1) - scores = f1_score(y_pred, ky[i].argmax(axis=1), average='macro') + scores = f1_score(y_pred, + ky[i].argmax(axis=1), + average='macro') elif mode == 'regression': y_pred = model.predict(kx[i]) diff --git a/talos/metrics/__init__.py b/talos/metrics/__init__.py index 8b137891..e69de29b 100644 --- a/talos/metrics/__init__.py +++ b/talos/metrics/__init__.py @@ -1 +0,0 @@ - diff --git a/talos/model/__init__.py b/talos/model/__init__.py index d9ea1050..7a3c79bb 100644 --- a/talos/model/__init__.py +++ b/talos/model/__init__.py @@ -1,4 +1,3 @@ from .early_stopper import early_stopper from .layers import hidden_layers from .normalizers import lr_normalizer - diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index e8c5f748..1930583c 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -1,8 +1,7 @@ import numpy as np from ..reducers.sample_reducer import sample_reducer -from .round_params import create_params_dict -from .permutation_filter import permutation_filter +from ..reducers.permutation_filter import permutation_filter class ParamGrid: @@ -62,7 +61,7 @@ def __init__(self, main_self): self.param_grid = np.column_stack((self.param_grid, self.param_log)) def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): - + # select permutations according to downsample if final_grid_size < virtual_grid_size: out = sample_reducer(self, final_grid_size, virtual_grid_size) @@ -71,7 +70,7 @@ def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): # build the parameter permutation grid param_grid = self._create_param_permutations(ls, out) - + return param_grid def _create_param_permutations(self, ls, permutation_index): diff --git a/talos/parameters/permutation_filter.py b/talos/parameters/permutation_filter.py deleted file mode 100644 index 44dbc8c8..00000000 --- a/talos/parameters/permutation_filter.py +++ /dev/null @@ -1,32 +0,0 @@ -def permutation_filter(self, ls, final_grid_size, virtual_grid_size): - - '''Handles the filtering for ta.Scan(... permutation_filter= ...)''' - - from .round_params import create_params_dict - - # handle the filtering with the current params grid - - def fn(i): - - params_dict = create_params_dict(self, i) - fn = self.main_self.permutation_filter(params_dict) - - return fn - - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - final_expanded_grid_size = final_grid_size - - while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: - final_expanded_grid_size *= 2 - - if final_expanded_grid_size > virtual_grid_size: - final_expanded_grid_size = virtual_grid_size - - self.param_grid = self._create_param_grid(ls, final_expanded_grid_size, virtual_grid_size) - grid_indices=list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - - self.param_grid = self.param_grid[:final_grid_size] - - return self \ No newline at end of file diff --git a/talos/reducers/permutation_filter.py b/talos/reducers/permutation_filter.py new file mode 100644 index 00000000..a411b87e --- /dev/null +++ b/talos/reducers/permutation_filter.py @@ -0,0 +1,35 @@ +def permutation_filter(self, ls, final_grid_size, virtual_grid_size): + + '''Handles the filtering for ta.Scan(... permutation_filter= ...)''' + + from ..parameters.round_params import create_params_dict + + # handle the filtering with the current params grid + + def fn(i): + + params_dict = create_params_dict(self, i) + fn = self.main_self.permutation_filter(params_dict) + + return fn + + grid_indices = list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + final_expanded_grid_size = final_grid_size + + while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: + final_expanded_grid_size *= 2 + + if final_expanded_grid_size > virtual_grid_size: + final_expanded_grid_size = virtual_grid_size + + self.param_grid = self._create_param_grid(ls, + final_expanded_grid_size, + virtual_grid_size) + + grid_indices = list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] + + self.param_grid = self.param_grid[:final_grid_size] + + return self diff --git a/talos/reducers/sample_reducer.py b/talos/reducers/sample_reducer.py index 824dbb35..e5cde054 100644 --- a/talos/reducers/sample_reducer.py +++ b/talos/reducers/sample_reducer.py @@ -5,7 +5,6 @@ def sample_reducer(self, length, max_value): - '''Sample Reducer (Helper) NOTE: The Scan() object is in self.main_self because diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index bb1e0358..e2d557cd 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -7,36 +7,36 @@ class Scan: """Hyperparamater scanning and optimization - USE: ta.Scan(x=x, y=y, params=params_dict, model=model) + USE: ta.Scan(x=x, y=y, params=params_dict, model=model) - Takes in a Keras model, and a dictionary with the parameter - boundaries for the experiment. + Takes in a Keras model, and a dictionary with the parameter + boundaries for the experiment. p = { 'epochs' : [50, 100, 200], 'activation' : ['relu'], 'dropout': (0, 0.1, 5) } - + Accepted input formats are [1] single value in a list, [0.1, 0.2] - multiple values in a list, and (0, 0.1, 5) a range of 5 values - from 0 to 0.1. + multiple values in a list, and (0, 0.1, 5) a range of 5 values + from 0 to 0.1. Here is an example of the input model: def model(): - # any Keras model + # any Keras model return out, model - You must replace the parameters in the model with references to + You must replace the parameters in the model with references to the dictionary, for example: model.fit(epochs=params['epochs']) - To learn more, start from the examples and documentation + To learn more, start from the examples and documentation available here: https://github.com/autonomio/talos @@ -84,11 +84,12 @@ def model(): forward, and `reverse` starts at the end of the grid and moves backwards. max_iteration_start_time : None or str - Allows setting a time when experiment will be completed. + Allows setting a time when experiment will be completed. Use the format + "%Y-%m-%d %H:%M" here. permutation_filter : lambda function Use it to filter permutations based on previous knowledge. USE: permutation_filter=lambda p: p['batch_size'] < 150 - This example removes any permutation where batch_size is below 150 + This example removes any permutation where batch_size is below 150 reduction_method : {None, 'correlation'} Method for honing in on the optimal hyperparameter subspace. (Default is None). @@ -129,11 +130,11 @@ def __init__(self, x, y, params, model, val_split=.3, shuffle=True, round_limit=None, + time_limit=None, grid_downsample=1.0, random_method='uniform_mersenne', seed=None, search_method='random', - max_iteration_start_time=None, permutation_filter=None, reduction_method=None, reduction_interval=50, @@ -163,7 +164,8 @@ def __init__(self, x, y, params, model, self.shuffle = shuffle self.random_method = random_method self.search_method = search_method - self.max_iteration_start_time = max_iteration_start_time + self.round_limit = round_limit + self.time_limit = time_limit self.permutation_filter = permutation_filter self.reduction_method = reduction_method self.reduction_interval = reduction_interval @@ -172,7 +174,6 @@ def __init__(self, x, y, params, model, self.reduction_threshold = reduction_threshold self.reduction_metric = reduction_metric self.reduce_loss = reduce_loss - self.round_limit = round_limit self.debug = debug self.seed = seed self.clear_tf_session = clear_tf_session diff --git a/talos/scan/scan_finish.py b/talos/scan/scan_finish.py index bd059a03..7355ffbf 100644 --- a/talos/scan/scan_finish.py +++ b/talos/scan/scan_finish.py @@ -5,23 +5,21 @@ from ..utils.string_cols_to_numeric import string_cols_to_numeric -attrs_to_keep = ['data', 'x', 'y', 'peak_epochs_df', - 'random_method', 'grid_downsample', - 'reduction_interval', 'reduce_loss', - 'reduction_method', 'reduction_metric', - 'reduction_threshold', 'reduction_window', - 'params', 'saved_models', 'saved_weights', - 'experiment_name'] - attrs_final = ['data', 'x', 'y', 'peak_epochs_df', 'round_times', 'params', 'saved_models', 'saved_weights'] +attrs_to_keep = attrs_final + ['random_method', 'grid_downsample', + 'reduction_interval', 'reduce_loss', + 'reduction_method', 'reduction_metric', + 'reduction_threshold', 'reduction_window', + 'experiment_name'] + def scan_finish(self): # create a dataframe with permutation times self.round_times = DataFrame(self.round_times) - self.columns = ['start', 'end', 'duration'] + self.round_times.columns = ['start', 'end', 'duration'] # combine entropy tables self.peak_epochs_df['acc_epoch'] = [i[0] for i in self.epoch_entropy] @@ -47,8 +45,9 @@ def scan_finish(self): out['complete_time'] = time.strftime('%D/%H:%M') try: out['x_shape'] = self.x.shape + # for the case when x is list except AttributeError: - out['x_shape'] = list + out['x_shape'] = 'list' out['y_shape'] = self.y.shape diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index 2725cb5e..eed4997a 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -1,4 +1,5 @@ from time import strftime +from datetime import datetime from ..utils.validation_split import validation_split from ..utils.detector import prediction_type @@ -7,11 +8,6 @@ from ..utils.last_neuron import last_neuron -TRAIN_VAL_RUNTIME_ERROR_MSG = """ -If x_val or y_val is inputted, then the other must be inputted as well. -""" - - def scan_prepare(self): '''Includes all preparation procedures up until starting the first scan @@ -27,6 +23,11 @@ def scan_prepare(self): if self.experiment_name is None: self.experiment_name = self.dataset_name + '_' + self.experiment_no + # handle the case where a time limit is set + if self.time_limit is not None: + self._stoptime = datetime.strptime(self.time_limit, + "%Y-%m-%d %H:%M") + # create the round times list self.round_times = [] @@ -34,7 +35,7 @@ def scan_prepare(self): self.custom_val_split = False if (self.x_val is not None and self.y_val is None) or \ (self.x_val is None and self.y_val is not None): - raise RuntimeError(TRAIN_VAL_RUNTIME_ERROR_MSG) + raise RuntimeError("If x_val/y_val is inputted, other must as well.") elif (self.x_val is not None and self.y_val is not None): self.custom_val_split = True diff --git a/talos/scan/scan_round.py b/talos/scan/scan_round.py index 69a7cf4e..138f350f 100644 --- a/talos/scan/scan_round.py +++ b/talos/scan/scan_round.py @@ -24,7 +24,7 @@ def scan_round(self): print(self.round_params) # set start time - round_start = strftime('%H%M%S') + round_start = strftime('%D-%H%M%S') start = time() # fit the model @@ -37,10 +37,12 @@ def scan_round(self): print('ERROR MESSAGE : ' + err.args[0]) raise TalosReturnError("Make sure that input model returns 'out, model' where out is history object from model.fit()") + # count the duration of the round + self._round_seconds = time() - start + # set end time and log - round_end = strftime('%H%M%S') - round_seconds = time() - start - self.round_times.append([round_start, round_end, round_seconds]) + round_end = strftime('%D-%H%M%S') + self.round_times.append([round_start, round_end, self._round_seconds]) # create log and other stats try: diff --git a/talos/scan/scan_run.py b/talos/scan/scan_run.py index 765f19de..25520426 100644 --- a/talos/scan/scan_run.py +++ b/talos/scan/scan_run.py @@ -1,5 +1,4 @@ from tqdm import tqdm - from datetime import datetime from ..utils.results import result_todf, peak_epochs_todf @@ -11,20 +10,19 @@ def scan_run(self): '''The high-level management of the scan procedures onwards from preparation. Manages round_run()''' - - if self.max_iteration_start_time != None: - stoptime=datetime.strptime(self.max_iteration_start_time,"%Y-%m-%d %H:%M") - # main loop for the experiment - # NOTE: the progress bar is also updated on line 73 + # initiate the progress bar self.pbar = tqdm(total=len(self.param_log), disable=self.disable_progress_bar) + + # start the main loop of the program while len(self.param_log) != 0: self = scan_round(self) self.pbar.update(1) - if self.max_iteration_start_time != None and datetime.now() > stoptime: - print("Time limit reached, experiment finished") - break + if self.time_limit is not None: + if datetime.now() > self._stoptime: + print("Time limit reached, experiment finished") + break self.pbar.close() # save the results diff --git a/talos/utils/connection_check.py b/talos/utils/connection_check.py index 8e307e8a..f7fa5fb3 100644 --- a/talos/utils/connection_check.py +++ b/talos/utils/connection_check.py @@ -7,4 +7,4 @@ def is_connected(): return True except OSError: pass - return False \ No newline at end of file + return False diff --git a/talos/utils/generator.py b/talos/utils/generator.py index 8b010b70..26e3f927 100644 --- a/talos/utils/generator.py +++ b/talos/utils/generator.py @@ -1,7 +1,8 @@ import numpy as np + def generator(x, y, batch_size): - + '''Creates a data generator for Keras fit_generator(). ''' samples_per_epoch = x.shape[0] @@ -17,4 +18,4 @@ def generator(x, y, batch_size): yield x_batch, y_batch if counter >= number_of_batches: - counter = 0 \ No newline at end of file + counter = 0 diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 9214010b..74b08b5f 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -219,7 +219,6 @@ def values_list_test(self): random_method='crypto_uniform', seed=2423, search_method='linear', - max_iteration_start_time=None, permutation_filter=lambda p: p['first_neuron'] * p['hidden_layers'] < 9, reduction_method='correlation', reduction_interval=2, From 32aafd010aa4d6aaee98b0a792949eff93a1f3b0 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 4 Mar 2019 15:17:55 +0200 Subject: [PATCH 25/26] added tools to utils - added gpu_utils and generator to ta.utils... - added tests for generator.py --- talos/utils/__init__.py | 2 ++ talos/utils/generator.py | 5 ++--- talos/utils/gpu_utils.py | 13 ++++++++----- test_script.py | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index adf69eb2..6e59bec5 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -8,4 +8,6 @@ from ..model.normalizers import lr_normalizer from ..model.layers import hidden_layers from ..model.early_stopper import early_stopper +from .generator import generator +from . import gpu_utils import talos.metrics.keras_metrics as metrics diff --git a/talos/utils/generator.py b/talos/utils/generator.py index 26e3f927..46e5f171 100644 --- a/talos/utils/generator.py +++ b/talos/utils/generator.py @@ -1,10 +1,9 @@ -import numpy as np - - def generator(x, y, batch_size): '''Creates a data generator for Keras fit_generator(). ''' + import numpy as np + samples_per_epoch = x.shape[0] number_of_batches = samples_per_epoch / batch_size counter = 0 diff --git a/talos/utils/gpu_utils.py b/talos/utils/gpu_utils.py index cfbf7b83..72f6b7c0 100644 --- a/talos/utils/gpu_utils.py +++ b/talos/utils/gpu_utils.py @@ -1,8 +1,3 @@ -import tensorflow as tf -import keras.backend as K -from keras.utils import multi_gpu_model - - def parallel_gpu_jobs(allow_growth=True, fraction=.5): '''Sets the max used memory as a fraction for tensorflow @@ -14,6 +9,9 @@ def parallel_gpu_jobs(allow_growth=True, fraction=.5): ''' + import keras.backend as K + import tensorflow as tf + gpu_options = K.tf.GPUOptions(allow_growth=allow_growth, per_process_gpu_memory_fraction=fraction) config = tf.ConfigProto(gpu_options=gpu_options) @@ -35,6 +33,8 @@ def multi_gpu(model, gpus=None, cpu_merge=True, cpu_relocation=False): ''' + from keras.utils import multi_gpu_model + return multi_gpu_model(model, gpus=gpus, cpu_merge=cpu_merge, @@ -46,6 +46,9 @@ def force_cpu(): '''Force CPU on a GPU system ''' + import keras.backend as K + import tensorflow as tf + config = tf.ConfigProto(device_count={'GPU': 0}) session = tf.Session(config=config) K.set_session(session) diff --git a/test_script.py b/test_script.py index a88bdf04..154506f5 100644 --- a/test_script.py +++ b/test_script.py @@ -31,7 +31,7 @@ # reporting specific testing from test.core_tests.test_scan import ReportingTest, DatasetTest - + ReportingTest() DatasetTest() @@ -59,5 +59,5 @@ ta.Restore(start_time + '.zip') test_random_methods() - fit_generator = generator(scan_object.x, scan_object.y, 20) + fit_generator = ta.utils.generator(scan_object.x, scan_object.y, 20) force_cpu() From 62d4848697b61ae35cf34f5095b2a82ec6a19449 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Tue, 5 Mar 2019 12:53:05 +0200 Subject: [PATCH 26/26] fixed #257 - fixed an issue with permutation_filter that resulted from code refractoring - pepified ParamGrid and permutation_filter.py --- talos/parameters/ParamGrid.py | 9 +++++++-- talos/reducers/permutation_filter.py | 14 +++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py index 1930583c..f3958749 100644 --- a/talos/parameters/ParamGrid.py +++ b/talos/parameters/ParamGrid.py @@ -44,11 +44,16 @@ def __init__(self, main_self): final_grid_size = min(final_grid_size, self.main_self.round_limit) # create the params grid - self.param_grid = self._create_param_grid(ls, final_grid_size, virtual_grid_size) + self.param_grid = self._create_param_grid(ls, + final_grid_size, + virtual_grid_size) # handle the case where permutation filter is provided if self.main_self.permutation_filter is not None: - self = permutation_filter(self, ls, final_grid_size, virtual_grid_size) + self = permutation_filter(self, + ls, + final_grid_size, + virtual_grid_size) # initialize with random shuffle if needed if self.main_self.shuffle: diff --git a/talos/reducers/permutation_filter.py b/talos/reducers/permutation_filter.py index a411b87e..9c26a41a 100644 --- a/talos/reducers/permutation_filter.py +++ b/talos/reducers/permutation_filter.py @@ -20,15 +20,15 @@ def fn(i): while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: final_expanded_grid_size *= 2 - if final_expanded_grid_size > virtual_grid_size: - final_expanded_grid_size = virtual_grid_size + if final_expanded_grid_size > virtual_grid_size: + final_expanded_grid_size = virtual_grid_size - self.param_grid = self._create_param_grid(ls, - final_expanded_grid_size, - virtual_grid_size) + self.param_grid = self._create_param_grid(ls, + final_expanded_grid_size, + virtual_grid_size) - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] + grid_indices = list(filter(fn, range(len(self.param_grid)))) + self.param_grid = self.param_grid[grid_indices] self.param_grid = self.param_grid[:final_grid_size]