Skip to content

Commit

Permalink
Merge pull request #588 from autonomio/582_support_multi_input
Browse files Browse the repository at this point in the history
582 support multi input
  • Loading branch information
mikkokotila committed Apr 23, 2022
2 parents f68c057 + 860ea30 commit db09756
Show file tree
Hide file tree
Showing 20 changed files with 145 additions and 89 deletions.
2 changes: 2 additions & 0 deletions docs/Examples_Multiple_Inputs.md
Expand Up @@ -24,6 +24,8 @@ x_train, y_train, x_val, y_val = wrangle.array_split(x, y, .5)
```
In the case of multi-input models, the data must be split into training and validation datasets before using it in `Scan()`. `x` is expected to be a list of numpy arrays and `y` a numpy array.

**NOTE:** For full support of Talos features for multi-input models, set `Scan(...multi_input=True...)`.

### Defining the Model
```python

Expand Down
31 changes: 21 additions & 10 deletions docs/Examples_Typical_Code.md
Expand Up @@ -3,18 +3,23 @@
# Typical Case Example

```python
import talos as ta
import talos as talos
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

x, y = ta.templates.datasets.iris()
x, y = talos.templates.datasets.iris()

# define the model
def iris_model(x_train, y_train, x_val, y_val, params):

model = Sequential()

model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])

model.compile(optimizer=params['optimizer'],
loss=params['losses'],
metrics=[talos.utils.metrics.f1score])

out = model.fit(x_train, y_train,
batch_size=params['batch_size'],
Expand All @@ -24,14 +29,20 @@ def iris_model(x_train, y_train, x_val, y_val, params):

return out, model

# set the parameter space boundaries
p = {'activation':['relu', 'elu'],
'optimizer': ['Nadam', 'Adam'],
'losses': ['logcosh'],
'hidden_layers':[0, 1, 2],
'batch_size': (20, 50, 5),
'epochs': [10, 20]}

scan_object = ta.Scan(x, y, model=iris_model, params=p, fraction_limit=0.1)
'optimizer': ['Nadam', 'Adam'],
'losses': ['categorical_crossentropy'],
'epochs': [100, 200],
'batch_size': [4, 6, 8]}

# start the experiment
scan_object = talos.Scan(x=x,
y=y,
model=iris_model,
params=p,
experiment_name='iris',
round_limit=20)
```

`Scan()` always needs to have `x`, `y`, `model`, and `params` arguments declared. Find the description for all `Scan()` arguments [here](Scan.md#scan-arguments).
1 change: 1 addition & 0 deletions docs/Predict.md
Expand Up @@ -34,5 +34,6 @@ Parameter | Default | Description
`model_id` | None | the model_id to be used
`metric` | None | the metric against which the validation is performed
`asc` | None | should be True if metric is a loss
`task`| NA | One of the following strings: 'binary' or 'multi_class'
`saved` | bool | if a model saved on local machine should be used
`custom_objects` | dict | if the model has a custom object, pass it here
1 change: 1 addition & 0 deletions docs/Scan.md
Expand Up @@ -23,6 +23,7 @@ Argument | Input | Description
`x_val` | array or list of arrays | validation data for x
`y_val` | array or list of arrays | validation data for y
`val_split` | float | validation data split ratio
`multi_input` | float | set to True if multi-input model
`random_method` | str | the random method to be used
`seed` | float | Seed for random states
`performance_target` | list | A result at which point to end experiment
Expand Down
2 changes: 1 addition & 1 deletion docs/_coverpage.md
@@ -1,6 +1,6 @@
![logo](_media/talos_logo_bg.png)

## v1.2.5
## v1.3

> Hyperparameter Experiments with Tensorflow, PyTorch and Keras
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Expand Up @@ -16,7 +16,7 @@
<div id="app"></div>
<script>
window.$docsify = {
name: 'Talos 1.2.5',
name: 'Talos 1.3',
repo: 'https://github.com/autonomio/talos',
coverpage: true,
loadSidebar: true,
Expand Down
Expand Up @@ -80,6 +80,9 @@
"source": [
"# first we have to make sure to input data and params into the function\n",
"def breast_cancer_model(x_train, y_train, x_val, y_val, params):\n",
" \n",
" from tensorflow.keras.layers import Dense, Dropout\n",
" from tensorflow.keras.models import Sequential\n",
"\n",
" model = Sequential()\n",
" model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1],\n",
Expand All @@ -98,7 +101,7 @@
" history = model.fit(x_train, y_train, \n",
" validation_data=[x_val, y_val],\n",
" batch_size=params['batch_size'],\n",
" callbacks=[talos.callbacks.TrainingPlot()],\n",
" callbacks=[talos.callbacks.TrainingPlot(metrics=['f1score'])],\n",
" epochs=params['epochs'],\n",
" verbose=0)\n",
"\n",
Expand All @@ -119,7 +122,7 @@
"execution_count": null,
"source": [
"# then we can go ahead and set the parameter space\n",
"p = {'first_neuron':[9,10,11],\n",
"p = {'first_neuron':[9, 10, 11],\n",
" 'hidden_layers':[0, 1, 2],\n",
" 'batch_size': [30],\n",
" 'epochs': [100],\n",
Expand Down Expand Up @@ -150,7 +153,8 @@
" model=breast_cancer_model,\n",
" params=p,\n",
" experiment_name='breast_cancer',\n",
" round_limit=10)"
" round_limit=50,\n",
" disable_progress_bar=True)"
],
"outputs": [],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -18,7 +18,7 @@
URL = 'http://autonom.io'
LICENSE = 'MIT'
DOWNLOAD_URL = 'https://github.com/autonomio/talos/'
VERSION = '1.2.5'
VERSION = '1.3'


try:
Expand Down
2 changes: 1 addition & 1 deletion talos/__init__.py
Expand Up @@ -35,4 +35,4 @@
del commands, scan, model, metrics, key
del sub, keep_from_templates, template_sub, warnings

__version__ = "1.2.5"
__version__ = "1.3"
4 changes: 3 additions & 1 deletion talos/commands/evaluate.py
Expand Up @@ -22,6 +22,7 @@ def evaluate(self,
asc=False,
saved=False,
custom_objects=None,
multi_input=False,
print_out=False):

'''Evaluate a model based on f1_score (all except regression)
Expand All @@ -44,6 +45,7 @@ def evaluate(self,
(e.g. accuracy or f1_score)
saved | bool | if a model saved on local machine should be used
custom_objects | dict | if the model has a custom object, pass it here
multi_input | bool | if multi-input model is evaluated, set to True
print_out | bool | Print out the results.
TODO: add possibility to input custom metrics.
Expand All @@ -65,7 +67,7 @@ def evaluate(self,
custom_objects=custom_objects)

from ..utils.validation_split import kfold
kx, ky = kfold(x, y, folds, shuffle)
kx, ky = kfold(x, y, folds, shuffle, multi_input)

for i in range(folds):

Expand Down
15 changes: 12 additions & 3 deletions talos/commands/predict.py
Expand Up @@ -48,6 +48,7 @@ def predict_classes(self,
x,
metric,
asc,
task,
model_id=None,
saved=False,
custom_objects=None):
Expand All @@ -59,6 +60,7 @@ def predict_classes(self,
model_id | int | the id of the model from the Scan() object
metric | str | the metric to be used for picking best model
asc | bool | True if `metric` is something to be minimized
task | string | 'binary' or 'multi_label'
saved | bool | if a model saved on local machine should be used
custom_objects | dict | if the model has a custom object, pass it here
'''
Expand All @@ -76,7 +78,14 @@ def predict_classes(self,
custom_objects)

# make (class) predictions with the model
preds = model.predict(x)
preds_classes = np.argmax(preds, axis=1)
preds = model.predict(x)

return preds_classes
if task == 'binary':
return np.where(preds >= 0.5, 1, 0)

elif task == 'multi_label':
return np.argmax(preds, 1)

else:
msg = 'Only `binary` and `multi_label` are supported'
raise AttributeError(msg)
5 changes: 4 additions & 1 deletion talos/scan/Scan.py
Expand Up @@ -59,6 +59,8 @@ def model():
val_split : float, optional
The proportion of the input `x` which is set aside as the
validation data. (Default is 0.3).
multi_input : bool, optional
If it is a multi_input model, then set to True.
# RANDOMNESS ARGUMENTS
----------------------
Expand Down Expand Up @@ -140,6 +142,7 @@ def __init__(self,
x_val=None,
y_val=None,
val_split=.3,
multi_input=False,
random_method='uniform_mersenne',
seed=None,
performance_target=None,
Expand Down Expand Up @@ -167,6 +170,7 @@ def __init__(self,
self.x_val = x_val
self.y_val = y_val
self.val_split = val_split
self.multi_input = multi_input

# randomness
self.random_method = random_method
Expand Down Expand Up @@ -195,7 +199,6 @@ def __init__(self,
self.clear_session = clear_session
self.save_weights = save_weights
self.save_models = save_models
# input parameters section ends

# start runtime
from .scan_run import scan_run
Expand Down
4 changes: 2 additions & 2 deletions talos/scan/scan_finish.py
Expand Up @@ -45,12 +45,12 @@ def scan_finish(self):
try:
out['x_shape'] = self.x.shape
except AttributeError:
out['x_shape'] = 'list'
out['x_shape'] = 'multi-input'

try:
out['y_shape'] = self.y.shape
except AttributeError:
out['y_shape'] = 'list'
out['y_shape'] = 'multi-input'

# final cleanup
keys = list(self.__dict__.keys())
Expand Down
7 changes: 4 additions & 3 deletions talos/templates/models.py
Expand Up @@ -103,9 +103,10 @@ def titanic(x_train, y_train, x_val, y_val, params):
metrics=['acc'])

# here we are also using the early_stopper function for a callback
out = model.fit(x_train, y_train,
out = model.fit(x=x_train,
y=y_train,
batch_size=params['batch_size'],
epochs=2,
epochs=params['epochs'],
verbose=0,
validation_data=(x_val, y_val))

Expand All @@ -128,7 +129,7 @@ def iris(x_train, y_train, x_val, y_val, params):
model.add(Dropout(params['dropout']))

# with this call we can create any number of hidden layers
hidden_layers(model, params, y_train.shape[1])
hidden_layers(model, params, x_train.shape[1])

# again, instead of the activation name, we have a dictionary entry
model.add(Dense(y_train.shape[1],
Expand Down
27 changes: 23 additions & 4 deletions talos/templates/params.py
@@ -1,15 +1,34 @@
def titanic():
def titanic(debug=False):

from tensorflow.keras.optimizers import Adam, Nadam

# here use a standard 2d dictionary for inputting the param boundaries
p = {'lr': (0.5, 5, 10),
'first_neuron': [4, 8, 16],
'batch_size': [20, 30, 40],
'dropout': (0, 0.5, 5),
'optimizer': ['Adam', 'Nadam'],
'optimizer': [Adam(), Nadam()],
'epochs': [50, 100, 150],
'losses': ['logcosh', 'binary_crossentropy'],
'shapes': ['brick', 'triangle', 0.2],
'hidden_layers': [0, 1, 2, 3, 4],
'activation': ['relu', 'elu'],
'last_activation': ['sigmoid']}

if debug:

p = {'lr': [0.1, 0.2],
'first_neuron': [4, 8],
'batch_size': [20, 30],
'dropout': [0.2, 0.3],
'optimizer': [Adam(), Nadam()],
'epochs': [50, 100],
'losses': ['logcosh', 'binary_crossentropy'],
'shapes': ['brick', 'triangle', 0.2],
'hidden_layers': [0, 1],
'activation': ['relu', 'elu'],
'last_activation': ['sigmoid']}

return p


Expand All @@ -24,10 +43,10 @@ def iris():
'first_neuron': [4, 8, 16, 32, 64],
'hidden_layers': [0, 1, 2, 3, 4],
'batch_size': (2, 30, 10),
'epochs': [2],
'epochs': [50, 100, 150],
'dropout': (0, 0.5, 5),
'weight_regulizer': [None],
'emb_output_dims': [None],
'emb_output_dims': [None],
'shapes': ['brick', 'triangle', 0.2],
'optimizer': [Adam, Nadam],
'losses': [logcosh, categorical_crossentropy],
Expand Down
8 changes: 4 additions & 4 deletions talos/templates/pipelines.py
Expand Up @@ -40,13 +40,13 @@ def iris(round_limit=2, random_method='uniform_mersenne'):
return scan_object


def titanic(round_limit=2, random_method='uniform_mersenne'):
def titanic(round_limit=2, random_method='uniform_mersenne', debug=False):

'''Performs a Scan with Iris dataset and simple dense net'''
import talos as ta
scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50],
ta.templates.datasets.titanic()[1][:50],
ta.templates.params.titanic(),
scan_object = ta.Scan(ta.templates.datasets.titanic()[0],
ta.templates.datasets.titanic()[1],
ta.templates.params.titanic(debug),
ta.templates.models.titanic,
'test',
random_method=random_method,
Expand Down
7 changes: 5 additions & 2 deletions talos/utils/recover_best_model.py
Expand Up @@ -5,6 +5,7 @@ def recover_best_model(x_train,
experiment_log,
input_model,
metric,
multi_input=False,
x_cross=None,
y_cross=None,
n_models=5,
Expand All @@ -19,6 +20,7 @@ def recover_best_model(x_train,
experiment_log | str | path to the Talos experiment log
input_model | function | model used in the experiment
metric | str | use this metric to pick evaluation candidates
multi_input | bool | set to True if multi-input model
x_cross | array | data for the cross-validation or None for use x_val
y_cross | array | data for the cross-validation or None for use y_val
n_models | int | number of models to cross-validate
Expand Down Expand Up @@ -50,13 +52,14 @@ def recover_best_model(x_train,
for i in range(n_models):

# get the params for the model and train it
params = df.sort_values(metric, ascending=False).drop(metric, 1).iloc[i].to_dict()
params = df.sort_values(metric, ascending=False)
params = params.drop(metric, 1).iloc[i].to_dict()
_history, model = input_model(x_train, y_train, x_val, y_val, params)

# start kfold cross-validation
out = []
folds = 5
kx, ky = kfold(x_cross, y_cross, folds, True)
kx, ky = kfold(x_cross, y_cross, folds, True, multi_input)

for i in range(folds):

Expand Down

0 comments on commit db09756

Please sign in to comment.