Merge pull request #588 from autonomio/582_support_multi_input

582 support multi input
autonomio · Apr 23, 2022 · db09756 · db09756
2 parents f68c057 + 860ea30
commit db09756
Show file tree

Hide file tree

Showing 20 changed files with 145 additions and 89 deletions.
diff --git a/docs/Examples_Multiple_Inputs.md b/docs/Examples_Multiple_Inputs.md
@@ -24,6 +24,8 @@ x_train, y_train, x_val, y_val = wrangle.array_split(x, y, .5)
 ```
 In the case of multi-input models, the data must be split into training and validation datasets before using it in `Scan()`. `x` is expected to be a list of numpy arrays and `y` a numpy array.
 
+**NOTE:** For full support of Talos features for multi-input models, set `Scan(...multi_input=True...)`.
+
 ### Defining the Model
 ```python
 

diff --git a/docs/Examples_Typical_Code.md b/docs/Examples_Typical_Code.md
@@ -3,18 +3,23 @@
 # Typical Case Example
 
 ```python
-import talos as ta
+import talos as talos
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense
 
-x, y = ta.templates.datasets.iris()
+x, y = talos.templates.datasets.iris()
 
+# define the model
 def iris_model(x_train, y_train, x_val, y_val, params):
 
     model = Sequential()
+
     model.add(Dense(32, input_dim=4, activation=params['activation']))
     model.add(Dense(3, activation='softmax'))
-    model.compile(optimizer=params['optimizer'], loss=params['losses'])
+
+    model.compile(optimizer=params['optimizer'],
+                  loss=params['losses'],
+                  metrics=[talos.utils.metrics.f1score])
 
     out = model.fit(x_train, y_train,
                      batch_size=params['batch_size'],
@@ -24,14 +29,20 @@ def iris_model(x_train, y_train, x_val, y_val, params):
 
     return out, model
 
+# set the parameter space boundaries
 p = {'activation':['relu', 'elu'],
-       'optimizer': ['Nadam', 'Adam'],
-       'losses': ['logcosh'],
-       'hidden_layers':[0, 1, 2],
-       'batch_size': (20, 50, 5),
-       'epochs': [10, 20]}
-
-scan_object = ta.Scan(x, y, model=iris_model, params=p, fraction_limit=0.1)
+     'optimizer': ['Nadam', 'Adam'],
+     'losses': ['categorical_crossentropy'],
+     'epochs': [100, 200],
+     'batch_size': [4, 6, 8]}
+
+# start the experiment
+scan_object = talos.Scan(x=x, 
+                         y=y, 
+                         model=iris_model,
+                         params=p,
+                         experiment_name='iris',
+                         round_limit=20)
 ```
 
 `Scan()` always needs to have `x`, `y`, `model`, and `params` arguments declared. Find the description for all `Scan()` arguments [here](Scan.md#scan-arguments).
diff --git a/docs/Predict.md b/docs/Predict.md
@@ -34,5 +34,6 @@ Parameter | Default | Description
 `model_id` | None | the model_id to be used
 `metric` | None | the metric against which the validation is performed
 `asc` | None | should be True if metric is a loss
+`task`| NA | One of the following strings: 'binary' or 'multi_class'
 `saved` | bool | if a model saved on local machine should be used
 `custom_objects` | dict | if the model has a custom object, pass it here
diff --git a/docs/Scan.md b/docs/Scan.md
@@ -23,6 +23,7 @@ Argument | Input | Description
 `x_val` | array or list of arrays | validation data for x
 `y_val` | array or list of arrays | validation data for y
 `val_split` | float | validation data split ratio
+`multi_input` | float | set to True if multi-input model
 `random_method` | str | the random method to be used
 `seed` | float | Seed for random states
 `performance_target` | list | A result at which point to end experiment

diff --git a/docs/_coverpage.md b/docs/_coverpage.md
@@ -1,6 +1,6 @@
 ![logo](_media/talos_logo_bg.png)
 
-## v1.2.5
+## v1.3
 
 > Hyperparameter Experiments with Tensorflow, PyTorch and Keras
 

diff --git a/docs/index.html b/docs/index.html
@@ -16,7 +16,7 @@
   <div id="app"></div>
   <script>
     window.$docsify = {
-      name: 'Talos 1.2.5',
+      name: 'Talos 1.3',
       repo: 'https://github.com/autonomio/talos',
       coverpage: true,
       loadSidebar: true,

diff --git a/examples/Hyperparameter Optimization on Keras with Breast Cancer Data.ipynb b/examples/Hyperparameter Optimization on Keras with Breast Cancer Data.ipynb
@@ -80,6 +80,9 @@
    "source": [
     "# first we have to make sure to input data and params into the function\n",
     "def breast_cancer_model(x_train, y_train, x_val, y_val, params):\n",
+    "    \n",
+    "    from tensorflow.keras.layers import Dense, Dropout\n",
+    "    from tensorflow.keras.models import Sequential\n",
     "\n",
     "    model = Sequential()\n",
     "    model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1],\n",
@@ -98,7 +101,7 @@
     "    history = model.fit(x_train, y_train, \n",
     "                        validation_data=[x_val, y_val],\n",
     "                        batch_size=params['batch_size'],\n",
-    "                        callbacks=[talos.callbacks.TrainingPlot()],\n",
+    "                        callbacks=[talos.callbacks.TrainingPlot(metrics=['f1score'])],\n",
     "                        epochs=params['epochs'],\n",
     "                        verbose=0)\n",
     "\n",
@@ -119,7 +122,7 @@
    "execution_count": null,
    "source": [
     "# then we can go ahead and set the parameter space\n",
-    "p = {'first_neuron':[9,10,11],\n",
+    "p = {'first_neuron':[9, 10, 11],\n",
     "     'hidden_layers':[0, 1, 2],\n",
     "     'batch_size': [30],\n",
     "     'epochs': [100],\n",
@@ -150,7 +153,8 @@
     "               model=breast_cancer_model,\n",
     "               params=p,\n",
     "               experiment_name='breast_cancer',\n",
-    "               round_limit=10)"
+    "               round_limit=50,\n",
+    "               disable_progress_bar=True)"
    ],
    "outputs": [],
    "metadata": {

diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
 URL = 'http://autonom.io'
 LICENSE = 'MIT'
 DOWNLOAD_URL = 'https://github.com/autonomio/talos/'
-VERSION = '1.2.5'
+VERSION = '1.3'
 
 
 try:

diff --git a/talos/__init__.py b/talos/__init__.py
@@ -35,4 +35,4 @@
 del commands, scan, model, metrics, key
 del sub, keep_from_templates, template_sub, warnings
 
-__version__ = "1.2.5"
+__version__ = "1.3"
diff --git a/talos/commands/evaluate.py b/talos/commands/evaluate.py
@@ -22,6 +22,7 @@ def evaluate(self,
                  asc=False,
                  saved=False,
                  custom_objects=None,
+                 multi_input=False,
                  print_out=False):
 
         '''Evaluate a model based on f1_score (all except regression)
@@ -44,6 +45,7 @@ def evaluate(self,
                      (e.g. accuracy or f1_score)
         saved | bool | if a model saved on local machine should be used
         custom_objects | dict | if the model has a custom object, pass it here
+        multi_input | bool | if multi-input model is evaluated, set to True
         print_out | bool | Print out the results.
 
         TODO: add possibility to input custom metrics.
@@ -65,7 +67,7 @@ def evaluate(self,
                                custom_objects=custom_objects)
 
         from ..utils.validation_split import kfold
-        kx, ky = kfold(x, y, folds, shuffle)
+        kx, ky = kfold(x, y, folds, shuffle, multi_input)
 
         for i in range(folds):
 

diff --git a/talos/commands/predict.py b/talos/commands/predict.py
@@ -48,6 +48,7 @@ def predict_classes(self,
                         x,
                         metric,
                         asc,
+                        task,
                         model_id=None,
                         saved=False,
                         custom_objects=None):
@@ -59,6 +60,7 @@ def predict_classes(self,
         model_id | int | the id of the model from the Scan() object
         metric | str | the metric to be used for picking best model
         asc | bool | True if `metric` is something to be minimized
+        task | string | 'binary' or 'multi_label'
         saved | bool | if a model saved on local machine should be used
         custom_objects | dict | if the model has a custom object, pass it here
         '''
@@ -76,7 +78,14 @@ def predict_classes(self,
                                custom_objects)
 
         # make (class) predictions with the model
-        preds = model.predict(x)   
-        preds_classes = np.argmax(preds, axis=1)
+        preds = model.predict(x)
 
-        return preds_classes
+        if task == 'binary':
+            return np.where(preds >= 0.5, 1, 0)
+
+        elif task == 'multi_label':
+            return np.argmax(preds, 1)
+
+        else:
+            msg = 'Only `binary` and `multi_label` are supported'
+            raise AttributeError(msg)
diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py
@@ -59,6 +59,8 @@ def model():
     val_split : float, optional
         The proportion of the input `x` which is set aside as the
         validation data. (Default is 0.3).
+    multi_input : bool, optional
+        If it is a multi_input model, then set to True.
 
     # RANDOMNESS ARGUMENTS
     ----------------------
@@ -140,6 +142,7 @@ def __init__(self,
                  x_val=None,
                  y_val=None,
                  val_split=.3,
+                 multi_input=False,
                  random_method='uniform_mersenne',
                  seed=None,
                  performance_target=None,
@@ -167,6 +170,7 @@ def __init__(self,
         self.x_val = x_val
         self.y_val = y_val
         self.val_split = val_split
+        self.multi_input = multi_input
 
         # randomness
         self.random_method = random_method
@@ -195,7 +199,6 @@ def __init__(self,
         self.clear_session = clear_session
         self.save_weights = save_weights
         self.save_models = save_models
-        # input parameters section ends
 
         # start runtime
         from .scan_run import scan_run

diff --git a/talos/scan/scan_finish.py b/talos/scan/scan_finish.py
@@ -45,12 +45,12 @@ def scan_finish(self):
     try:
         out['x_shape'] = self.x.shape
     except AttributeError:
-        out['x_shape'] = 'list'
+        out['x_shape'] = 'multi-input'
 
     try:
         out['y_shape'] = self.y.shape
     except AttributeError:
-        out['y_shape'] = 'list'
+        out['y_shape'] = 'multi-input'
 
     # final cleanup
     keys = list(self.__dict__.keys())

diff --git a/talos/templates/models.py b/talos/templates/models.py
@@ -103,9 +103,10 @@ def titanic(x_train, y_train, x_val, y_val, params):
                   metrics=['acc'])
 
     # here we are also using the early_stopper function for a callback
-    out = model.fit(x_train, y_train,
+    out = model.fit(x=x_train,
+                    y=y_train,
                     batch_size=params['batch_size'],
-                    epochs=2,
+                    epochs=params['epochs'],
                     verbose=0,
                     validation_data=(x_val, y_val))
 
@@ -128,7 +129,7 @@ def iris(x_train, y_train, x_val, y_val, params):
     model.add(Dropout(params['dropout']))
 
     # with this call we can create any number of hidden layers
-    hidden_layers(model, params, y_train.shape[1])
+    hidden_layers(model, params, x_train.shape[1])
 
     # again, instead of the activation name, we have a dictionary entry
     model.add(Dense(y_train.shape[1],

diff --git a/talos/templates/params.py b/talos/templates/params.py
@@ -1,15 +1,34 @@
-def titanic():
+def titanic(debug=False):
+
+    from tensorflow.keras.optimizers import Adam, Nadam
 
     # here use a standard 2d dictionary for inputting the param boundaries
     p = {'lr': (0.5, 5, 10),
          'first_neuron': [4, 8, 16],
          'batch_size': [20, 30, 40],
          'dropout': (0, 0.5, 5),
-         'optimizer': ['Adam', 'Nadam'],
+         'optimizer': [Adam(), Nadam()],
+         'epochs': [50, 100, 150],
          'losses': ['logcosh', 'binary_crossentropy'],
+         'shapes': ['brick', 'triangle', 0.2],
+         'hidden_layers': [0, 1, 2, 3, 4],
          'activation': ['relu', 'elu'],
          'last_activation': ['sigmoid']}
 
+    if debug:
+
+        p = {'lr': [0.1, 0.2],
+            'first_neuron': [4, 8],
+            'batch_size': [20, 30],
+            'dropout': [0.2, 0.3],
+            'optimizer': [Adam(), Nadam()],
+            'epochs': [50, 100],
+            'losses': ['logcosh', 'binary_crossentropy'],
+            'shapes': ['brick', 'triangle', 0.2],
+            'hidden_layers': [0, 1],
+            'activation': ['relu', 'elu'],
+            'last_activation': ['sigmoid']}
+
     return p
 
 
@@ -24,10 +43,10 @@ def iris():
          'first_neuron': [4, 8, 16, 32, 64],
          'hidden_layers': [0, 1, 2, 3, 4],
          'batch_size': (2, 30, 10),
-         'epochs': [2],
+         'epochs': [50, 100, 150],
          'dropout': (0, 0.5, 5),
          'weight_regulizer': [None],
-         'emb_output_dims':  [None],
+         'emb_output_dims': [None],
          'shapes': ['brick', 'triangle', 0.2],
          'optimizer': [Adam, Nadam],
          'losses': [logcosh, categorical_crossentropy],

diff --git a/talos/templates/pipelines.py b/talos/templates/pipelines.py
@@ -40,13 +40,13 @@ def iris(round_limit=2, random_method='uniform_mersenne'):
     return scan_object
 
 
-def titanic(round_limit=2, random_method='uniform_mersenne'):
+def titanic(round_limit=2, random_method='uniform_mersenne', debug=False):
 
     '''Performs a Scan with Iris dataset and simple dense net'''
     import talos as ta
-    scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50],
-                          ta.templates.datasets.titanic()[1][:50],
-                          ta.templates.params.titanic(),
+    scan_object = ta.Scan(ta.templates.datasets.titanic()[0],
+                          ta.templates.datasets.titanic()[1],
+                          ta.templates.params.titanic(debug),
                           ta.templates.models.titanic,
                           'test',
                           random_method=random_method,

diff --git a/talos/utils/recover_best_model.py b/talos/utils/recover_best_model.py
@@ -5,6 +5,7 @@ def recover_best_model(x_train,
                        experiment_log,
                        input_model,
                        metric,
+                       multi_input=False,
                        x_cross=None,
                        y_cross=None,
                        n_models=5,
@@ -19,6 +20,7 @@ def recover_best_model(x_train,
     experiment_log | str | path to the Talos experiment log
     input_model | function | model used in the experiment
     metric | str | use this metric to pick evaluation candidates
+    multi_input | bool | set to True if multi-input model
     x_cross | array | data for the cross-validation or None for use x_val
     y_cross | array | data for the cross-validation or None for use y_val
     n_models | int | number of models to cross-validate
@@ -50,13 +52,14 @@ def recover_best_model(x_train,
     for i in range(n_models):
 
         # get the params for the model and train it
-        params = df.sort_values(metric, ascending=False).drop(metric, 1).iloc[i].to_dict()
+        params = df.sort_values(metric, ascending=False)
+        params = params.drop(metric, 1).iloc[i].to_dict()
         _history, model = input_model(x_train, y_train, x_val, y_val, params)
 
         # start kfold cross-validation
         out = []
         folds = 5
-        kx, ky = kfold(x_cross, y_cross, folds, True)
+        kx, ky = kfold(x_cross, y_cross, folds, True, multi_input)
 
         for i in range(folds):