diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index b4ed1f3a..8d1a3d38 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -1,24 +1,38 @@ --- name: Bug Report -about: something needs fixing +about: I want to report something that is broken --- -Thanks so much for coming here to raise an issue. Please take a moment to 'check' the below boxes: +Thank you very much for reporting a bug on Talos. Before you do, please go through the below checklist carefully and make sure to prepare your bug report in a way that facilitates effective handling of the matter. -- [ ] I'm up-to-date with the latest release: - - pip install -U talos +#### 1) Confirm the below -- [ ] I've confirmed that my Keras model works outside of Talos. +- [ ] My Python version is 3.5 or higher +- [ ] I have searched through the issues [Issues](https://github.com/autonomio/talos/issues) for a duplicate +- [ ] I've tested that my Keras model works as a stand-alone -If you still have an error, please submit **complete trace** and a code with: +#### 2) Include the output of: -- output of shape for x and y e.g. (212,12) -- Talos params dictionary -- The Keras model wired for Talos -- Description of extra variables in the model +`talos.__version__` -You can provide the code in pastebin / gist or any other format you like. +#### 3) Explain clearly what you expect to happen + +*A description of what you tried to do and what you thought should happen.* + +#### 4) Explain what actually happened + +*A description of the issue in Talos that you had identified* + +#### 5) Provide a code-complete reference + +- [ ] My bug report includes an input model +- [ ] My bug report includes a parameter dictionary +- [ ] My bug report includes a `Scan()` command +- [ ] My bug report question includes a link to a sample of the data + +NOTE: If the data is sensitive and can't be shared, [create dummy data](https://scikit-learn.org/stable/modules/classes.html#samples-generator) that mimics it. + +**A self-contained Jupyter Notebook, Google Colab, or similar is highly preferred and will speed up helping you with your issue.** ------------------------------------------------------------------------- diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000..e32f51c9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,29 @@ +--- +name: Feature Request +about: I want to suggest a new feature + +--- + +Thanks a lot for suggesting a feature to Talos. Please take a moment to go through the below checklist to provide context in a way that makes it easy to take your request forward. + +#### 1) I think Talos should add... + +*A description of the feature with as much detail as you believe is valuable* + +#### 2) Once implemented, I can see how this feature will... + +*Explain how researchers will benefit from having this feature in Talos** + +#### 3) I believe this feature is... (choose one) + +- [ ] ...critically important +- [ ] ...must have +- [ ] ...nice to have + +#### 4) Given the chance, I'd be happy to make a PR for this feature... + +- [ ] ...definitely +- [ ] ...possibly +- [ ] ...unlikely + +------------------------------------------------------------------------- diff --git a/.github/ISSUE_TEMPLATE/support-request.md b/.github/ISSUE_TEMPLATE/support-request.md new file mode 100644 index 00000000..f6a257ba --- /dev/null +++ b/.github/ISSUE_TEMPLATE/support-request.md @@ -0,0 +1,43 @@ +--- +name: Support +about: I want to ask for support + +--- + +First off, make sure to check your [support options](https://github.com/autonomio/talos#-how-to-get-support). + +The preferred way to resolve usage related matters is through the [docs](https://autonomio.github.io/talos/#/) which are maintained up-to-date with the latest version of Talos. + +If you do end up asking for support in a new issue, make sure to follow the below steps carefully. + +#### 1) Confirm the below + +- [ ] I have looked for an answer in the [Docs](https://autonomio.github.io/talos) +- [ ] My Python version is 3.5 or higher +- [ ] I have searched through the issues [Issues](https://github.com/autonomio/talos/issues) for a duplicate +- [ ] I've tested that my Keras model works as a stand-alone + +#### 2) Include the output of: + +`talos.__version__` + +#### 3) Explain clearly what you are trying to achieve + +*A description of your specific use-case and what you hope to achieve with it* + +#### 4) Explain what you have already tried + +*An outline of the steps that you have already taken so far* + +#### 5) Provide a code-complete reference + +- [ ] My support question includes an input model +- [ ] My support question includes a parameter dictionary +- [ ] My support question includes a `Scan()` command +- [ ] My support question includes a link to a sample of the data + +NOTE: If the data is sensitive and can't be shared, [create dummy data](https://scikit-learn.org/stable/modules/classes.html#samples-generator) that mimics it. + +**A self-contained Jupyter Notebook, Google Colab, or similar is highly preferred and will speed up helping you with your issue.** + +------------------------------------------------------------------------- diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..ffbf40f5 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,14 @@ +## You want to make a PR to Talos + +Thanks so much :) First, please take a moment to carefully check through +the below items: + +- [ ] Changes have gone through actual use testing +- [ ] [Docs](https://autonomio.github.io/talos) are updated where relevant +- [ ] Code is [PEP8](https://www.python.org/dev/peps/pep-0008/) +- [ ] All local tests have passed (run ./test.sh in /talos) +- [ ] Travis tests have passed +- [ ] Open a pull request +- [ ] PR is to daily-dev branch + +
diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 03b5acd7..00000000 --- a/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,3 +0,0 @@ -- make sure that all tests are passed -- create some unit tests and update testing procedures accordingly -- make PR to dev (or daily-dev) diff --git a/docs/Examples_Generator.md b/docs/Examples_Generator.md index 5c8e20ee..118a4dfb 100644 --- a/docs/Examples_Generator.md +++ b/docs/Examples_Generator.md @@ -18,7 +18,7 @@ NOTE: In this example we will be using the `SequenceGenerator()` available in Ta ### Loading Data ```python -x, y = ta.templates.datasets.iris() +x_train, y_train, x_val, y_val = talos.templates.datasets.mnist() ``` `x` and `y` are expected to be either numpy arrays or lists of numpy arrays. @@ -28,7 +28,7 @@ x, y = ta.templates.datasets.iris() def mnist_model(x_train, y_train, x_val, y_val, params): model = Sequential() - model.add(Conv2D(32, kernel_size=(3, 3), activation=params['activation'], input_shape=input_shape)) + model.add(Conv2D(32, kernel_size=(3, 3), activation=params['activation'], input_shape=(28, 28, 1))) model.add(Flatten()) model.add(Dense(128, activation=params['activation'])) model.add(Dropout(params['dropout'])) @@ -41,11 +41,11 @@ def mnist_model(x_train, y_train, x_val, y_val, params): out = model.fit_generator(SequenceGenerator(x_train, y_train, batch_size=params['batch_size']), - epochs=params['epochs'], - validation_data=[x_val, y_val], - callbacks=[], - workers=4, - verbose=0) + epochs=params['epochs'], + validation_data=[x_val, y_val], + callbacks=[], + workers=4, + verbose=0) return out, model ``` diff --git a/docs/Examples_Generator_Code.md b/docs/Examples_Generator_Code.md index 7efdd5d7..d9fe72f2 100644 --- a/docs/Examples_Generator_Code.md +++ b/docs/Examples_Generator_Code.md @@ -14,7 +14,7 @@ x, y = ta.templates.datasets.iris() def mnist_model(x_train, y_train, x_val, y_val, params): model = Sequential() - model.add(Conv2D(32, kernel_size=(3, 3), activation=params['activation'], input_shape=input_shape)) + model.add(Conv2D(32, kernel_size=(3, 3), activation=params['activation'], input_shape=(28, 28, 1))) model.add(Flatten()) model.add(Dense(128, activation=params['activation'])) model.add(Dropout(params['dropout'])) @@ -27,11 +27,11 @@ def mnist_model(x_train, y_train, x_val, y_val, params): out = model.fit_generator(SequenceGenerator(x_train, y_train, batch_size=params['batch_size']), - epochs=params['epochs'], - validation_data=[x_val, y_val], - callbacks=[], - workers=4, - verbose=0) + epochs=params['epochs'], + validation_data=[x_val, y_val], + callbacks=[], + workers=4, + verbose=0) return out, model diff --git a/docs/Monitoring.md b/docs/Monitoring.md index 7bcde58e..7b185efa 100644 --- a/docs/Monitoring.md +++ b/docs/Monitoring.md @@ -28,5 +28,9 @@ Scan(print_params=True) Epoch-by-epoch training data is available during the experiment using the `ExperimentLogCallback`: ```python - +model.fit(... + callbacks=[talos.utils.ExperimentLogCallback('experiment_name', params)]) ``` +Here `params` is the params dictionary in the `Scan()` input model. Both +`experiment_name` and `experiment_id` should match with the current experiment, +as otherwise diff --git a/docs/Scan.md b/docs/Scan.md index 650ae1d3..1f4b7497 100644 --- a/docs/Scan.md +++ b/docs/Scan.md @@ -19,7 +19,7 @@ Argument | Input | Description `y` | array or list of arrays | prediction outcome variable `params` | dict | the parameter dictionary `model` | function | the Keras model as a function -`experiment_name` | str | Used for experiment log +`experiment_name` | str | Used for creating the experiment logging folder `x_val` | array or list of arrays | validation data for x `y_val` | array or list of arrays | validation data for y `val_split` | float | validation data split ratio @@ -38,7 +38,8 @@ Argument | Input | Description `minimize_loss` | bool | `reduction_metric` is a loss `disable_progress_bar` | bool | Disable live updating progress bar `print_params` | bool | Print each permutation hyperparameters -`clear_tf_session` | bool | Clear backend session between permutations +`clear_session` | bool | Clear backend session between permutations +`save_weights` | bool | Save model weights (increases memory pressure for large models) NOTE: `boolean_limit` will only work if its the last argument in `Scan()` and the following bracket is on a newline: diff --git a/docs/Templates.md b/docs/Templates.md index cd22a80b..febc43d8 100644 --- a/docs/Templates.md +++ b/docs/Templates.md @@ -30,6 +30,7 @@ talos.templates.datasets.breast_cancer() - telco_churn - titanic - iris +- mnist
diff --git a/setup.py b/setup.py index e1572632..79d2bc71 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ URL = 'http://autonom.io' LICENSE = 'MIT' DOWNLOAD_URL = 'https://github.com/autonomio/talos/' -VERSION = '0.6.2' +VERSION = '0.6.3' try: from setuptools import setup diff --git a/talos/__init__.py b/talos/__init__.py index a90e1732..c44bcaf8 100755 --- a/talos/__init__.py +++ b/talos/__init__.py @@ -23,7 +23,7 @@ templates.pipelines] keep_from_templates = ['iris', 'cervical_cancer', 'titanic', 'breast_cancer', - 'icu_mortality', 'telco_churn'] + 'icu_mortality', 'telco_churn', 'mnist'] for sub in template_sub: for key in list(sub.__dict__): @@ -34,4 +34,4 @@ del commands, scan, model, metrics, key del sub, keep_from_templates, template_sub, warnings -__version__ = "0.6.2" +__version__ = "0.6.3" diff --git a/talos/autom8/automodel.py b/talos/autom8/automodel.py index 27fe77a2..58daf6fc 100644 --- a/talos/autom8/automodel.py +++ b/talos/autom8/automodel.py @@ -1,6 +1,6 @@ class AutoModel: - def __init__(self, task, metric=None): + def __init__(self, task, experiment_name, metric=None): ''' @@ -19,12 +19,16 @@ def __init__(self, task, metric=None): If 'continuous' then mae is used for metric, if 'binary', 'multiclass', or 'multilabel', f1score is used. Accuracy is always used. + experiment_name | str | Must be same as in `Scan()` metric : None or list You can also input a list with one or more custom metrics or names of Keras or Talos metrics. ''' + from talos.utils.experiment_log_callback import ExperimentLogCallback + self.task = task + self.experiment_name = experiment_name self.metric = metric if self.task is not None: @@ -36,6 +40,7 @@ def __init__(self, task, metric=None): # create the model self.model = self._create_input_model + self.callback = ExperimentLogCallback def _set_metric(self): @@ -79,7 +84,8 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): elif params['network'] == 'dense': model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1], - activation='relu')) + activation='relu', + kernel_initializer=params['kernel_initializer'])) model.add(Dropout(params['dropout'])) @@ -95,7 +101,8 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): y_val) model.add(Dense(last_neuron, - activation=activation)) + activation=activation, + kernel_initializer=params['kernel_initializer'])) # bundle the optimizer with learning rate changes from talos.model.normalizers import lr_normalizer @@ -112,6 +119,7 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): batch_size=params['batch_size'], epochs=params['epochs'], verbose=0, + callbacks=[self.callback(self.experiment_name, params)], validation_data=[x_val, y_val]) # pass the output to Talos diff --git a/talos/autom8/autoparams.py b/talos/autom8/autoparams.py index aff1a77d..4d5ab7c4 100644 --- a/talos/autom8/autoparams.py +++ b/talos/autom8/autoparams.py @@ -46,9 +46,9 @@ def __init__(self, self.params = {} else: self.params = params - if auto: self._automated() + self.resample_params(4) def _automated(self, shapes='fixed'): @@ -77,7 +77,7 @@ def _automated(self, shapes='fixed'): if self._network: self.networks() else: - self.params['network'] = 'dense' + self.params['network'] = ['dense'] self.last_activations() def shapes(self, shapes='auto'): @@ -186,7 +186,9 @@ def kernel_initializers(self, kernel_inits='auto'): if kernel_inits == 'auto': self._append_params('kernel_initializer', - ['glorot_uniform', 'glorot_normal', + ['uniform', 'normal', 'he_normal', + 'he_uniform', 'lecun_normal', + 'glorot_uniform', 'glorot_normal', 'random_uniform', 'random_normal']) else: self._append_params('kernel_initializer', kernel_inits) diff --git a/talos/autom8/autoscan.py b/talos/autom8/autoscan.py index d7eef656..6cc4ad25 100644 --- a/talos/autom8/autoscan.py +++ b/talos/autom8/autoscan.py @@ -2,7 +2,8 @@ class AutoScan: def __init__(self, task, - max_param_values): + experiment_name, + max_param_values=None): '''Configure the `AutoScan()` experiment and then use the property `start` in the returned class object to start @@ -16,6 +17,7 @@ def __init__(self, self.task = task self.max_param_values = max_param_values + self.experiment_name = experiment_name def start(self, x, y, **kwargs): @@ -30,15 +32,25 @@ def start(self, x, y, **kwargs): import talos - m = talos.autom8.AutoModel(self.task).model + m = talos.autom8.AutoModel(self.task, self.experiment_name).model try: kwargs['params'] - scan_object = talos.Scan(x, y, model=m, **kwargs) + scan_object = talos.Scan(x, y, + model=m, + experiment_name=self.experiment_name, + **kwargs) except KeyError: p = talos.autom8.AutoParams(task=self.task) - p.resample_params(self.max_param_values) + + if self.max_param_values is not None: + p.resample_params(self.max_param_values) params = p.params - scan_object = talos.Scan(x, y, params, m, **kwargs) + scan_object = talos.Scan(x=x, + y=y, + params=params, + model=m, + experiment_name=self.experiment_name, + **kwargs) return scan_object diff --git a/talos/logging/results.py b/talos/logging/results.py index df6a002c..71f86946 100644 --- a/talos/logging/results.py +++ b/talos/logging/results.py @@ -30,7 +30,7 @@ def save_result(self): import numpy as np - np.savetxt(self.experiment_name + '.csv', + np.savetxt(self._experiment_log, self.result, fmt='%s', delimiter=',') diff --git a/talos/reducers/GamifyMap.py b/talos/reducers/GamifyMap.py new file mode 100644 index 00000000..5fcb43ec --- /dev/null +++ b/talos/reducers/GamifyMap.py @@ -0,0 +1,91 @@ +class GamifyMap: + + def __init__(self, scan_object): + + '''GamifyMap handles the management of the + dictionary that contains the information about + hyperparameters, which is exchanged in and out + during the `Scan()` experiment. + ''' + + self.params = scan_object.param_object.params + self.scan_object = scan_object + self.generate_gamify_dict() + self.gamify_map = self.generate_gamify_dict_map() + + # parse together the output file name + _folder = './' + scan_object.experiment_name + '/' + _id = scan_object._experiment_id + self._filename = _folder + _id + + def run_updates(self): + + for key in self.gamify_dict.keys(): + for val in self.gamify_dict[key].keys(): + if self.gamify_dict[key][val] != self.updated_dict[key][val]: + + label = list(self.params.keys())[int(key)] + value = self.params[label][int(val)] + + self.gamify_dict[key][val] = self.updated_dict[key][val] + self.scan_object.param_object.remove_is(label, value) + + return self.scan_object + + def back_to_original(self, gamify_from_json): + + gamify_dict = {} + for key in gamify_from_json: + param_vals = {} + for val in gamify_from_json[key]: + param_vals[g.gamify_map[key][val][1]] = gamify_from_json[key][val][:2] + gamify_dict[g.gamify_map[key][val][0]] = param_vals + + return gamify_dict + + def generate_gamify_dict_map(self): + + gamify_dict_map = {} + + for i, key in enumerate(self.params.keys()): + param_vals = {} + for ii, val in enumerate(self.params[key]): + param_vals[str(ii)] = [key, val] + gamify_dict_map[str(i)] = param_vals + + return gamify_dict_map + + def generate_gamify_dict(self): + + '''This is done once at the beginning + of the experiment. + + NOTE: This will be all stringified, so an index + mapping system will be used to convert back to + actual forms later.''' + + gamify_dict = {} + + for i, key in enumerate(self.params.keys()): + param_vals = {} + for ii, val in enumerate(self.params[key]): + param_vals[str(ii)] = ['active', 0, str(key), str(val)] + gamify_dict[str(i)] = param_vals + + self.gamify_dict = gamify_dict + + def export_json(self): + + import json + + with open(self._filename + '.json', 'w') as fp: + json.dump(self.gamify_dict, fp) + + def import_json(self): + + import json + + with open(self._filename + '.json', 'r') as fp: + out = json.load(fp) + + self.updated_dict = out diff --git a/talos/reducers/correlation.py b/talos/reducers/correlation.py index 833d556f..029e6778 100644 --- a/talos/reducers/correlation.py +++ b/talos/reducers/correlation.py @@ -48,7 +48,7 @@ def correlation(self, method): # convert things back to their original dtype value = np.array([value]).astype(dtype)[0] - # this is where modify the parameter space accordingly + # this is where we modify the parameter space accordingly self.param_object.remove_is(label, value) return self diff --git a/talos/reducers/forrest.py b/talos/reducers/forrest.py new file mode 100644 index 00000000..8e739910 --- /dev/null +++ b/talos/reducers/forrest.py @@ -0,0 +1,44 @@ +def forrest(self): + + '''Random Forrest based reduction strategy. Somewhat more + aggressive than for example 'spearman' because there are no + negative values, but instead the highest positive correlation + is minused from all the values so that max value is 0, and then + values are turned into positive. The one with the highest positive + score in the end will be dropped. This means that anything with + 0 originally, is a candidate for dropping. Because there are multiple + zeroes in many cases, there is an element of randomness on which one + is dropped. + + ''' + + import wrangle + import numpy as np + + # handle conversion to multi_labels + from .reduce_utils import cols_to_multilabel + data = cols_to_multilabel(self) + + # get the correlations + corr_values = wrangle.df_corr_randomforest(data, self.reduction_metric) + + # drop labels where value is NaN + corr_values.dropna(inplace=True) + + # handle the turning around of values (see docstring for more info) + corr_values -= corr_values[0] + corr_values = corr_values.abs() + + # get the strongest correlation + corr_values = corr_values.index[-1] + + # get the label, value, and dtype from the column header + label, dtype, value = corr_values.split('~') + + # convert things back to their original dtype + value = np.array([value]).astype(dtype)[0] + + # this is where we modify the parameter space accordingly + self.param_object.remove_is(label, value) + + return value, label diff --git a/talos/reducers/gamify.py b/talos/reducers/gamify.py new file mode 100644 index 00000000..6ea7eb24 --- /dev/null +++ b/talos/reducers/gamify.py @@ -0,0 +1,25 @@ +def gamify(self): + + '''Will apply reduction changes based on edits on the + the produced .json file in the experiment folder''' + + if self.param_object.round_counter == 1: + + # create the gamify object + from .GamifyMap import GamifyMap + g = GamifyMap(self) + + # keep in scan_object + self._gamify_object = g + + # do the first export in the experiment folder + g.export_json() + + return self + + # for every round check if there are changes + self._gamify_object.import_json() + self = self._gamify_object.run_updates() + self._gamify_object.export_json() + + return self diff --git a/talos/reducers/reduce_run.py b/talos/reducers/reduce_run.py index 2bd422ca..c4d95f66 100644 --- a/talos/reducers/reduce_run.py +++ b/talos/reducers/reduce_run.py @@ -20,6 +20,10 @@ def reduce_run(self): ''' from .correlation import correlation + from .forrest import forrest + from .trees import trees + from .gamify import gamify + from .local_strategy import local_strategy from .limit_by_metric import limit_by_metric @@ -41,6 +45,10 @@ def reduce_run(self): right = self.reduction_interval len_before_reduce = len(self.param_object.param_index) + # check if monte carlo can do something + if self.reduction_method == 'gamify': + self = gamify(self) + # apply window based reducers if left % right == 0: @@ -53,16 +61,12 @@ def reduce_run(self): self = correlation(self, 'spearman') # check if random forrest can do something - if self.reduction_method == 'random_forrest': - pass + if self.reduction_method == 'forrest': + self = forrest(self) # check if random forrest can do something - if self.reduction_method == 'extra_trees': - pass - - # check if monte carlo can do something - if self.reduction_method == 'monte_carlo': - pass + if self.reduction_method == 'trees': + self = trees(self) if self.reduction_method == 'local_strategy': self = local_strategy(self) diff --git a/talos/reducers/reduce_utils.py b/talos/reducers/reduce_utils.py index 20a0ee4c..9c80b01f 100644 --- a/talos/reducers/reduce_utils.py +++ b/talos/reducers/reduce_utils.py @@ -8,7 +8,7 @@ def cols_to_multilabel(self): import pandas as pd # read in the experiment log - data = pd.read_csv(self.experiment_name + '.csv') + data = pd.read_csv(self._experiment_log) # apply recuction window data = data.tail(self.reduction_window) diff --git a/talos/reducers/trees.py b/talos/reducers/trees.py new file mode 100644 index 00000000..a8f228a5 --- /dev/null +++ b/talos/reducers/trees.py @@ -0,0 +1,48 @@ +def trees(self, quantile=.8): + + '''Extra Trees based reduction strategy. Like 'forrest', somewhat more + aggressive than for example 'spearman' because there are no + negative values, but instead the highest positive correlation + is minused from all the values so that max value is 0, and then + values are turned into positive. The one with the highest positive + score in the end will be dropped. This means that anything with + 0 originally, is a candidate for dropping. Because there are multiple + zeroes in many cases, there is an element of randomness on which one + is dropped. + + ''' + + import wrangle + import numpy as np + + # handle conversion to multi_labels + from .reduce_utils import cols_to_multilabel + data = cols_to_multilabel(self) + + # because extra trees wants label as 'y' we first transform with quantile + quantile_value = data[self.reduction_metric].quantile(quantile) + data[self.reduction_metric] = data[self.reduction_metric] > quantile_value + + # get the correlations + corr_values = wrangle.df_corr_extratrees(data, self.reduction_metric) + + # drop labels where value is NaN + corr_values.dropna(inplace=True) + + # handle the turning around of values (see docstring for more info) + corr_values -= corr_values[0] + corr_values = corr_values.abs() + + # get the strongest correlation + corr_values = corr_values.index[-1] + + # get the label, value, and dtype from the column header + label, dtype, value = corr_values.split('~') + + # convert things back to their original dtype + value = np.array([value]).astype(dtype)[0] + + # this is where we modify the parameter space accordingly + self.param_object.remove_is(label, value) + + return value, label diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 403986a6..f60f5987 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -48,9 +48,10 @@ def model(): model : keras model Any Keras model with relevant declrations like params['first_neuron'] experiment_name : str - Experiment name will be used to produce the file name for the - results saved in the local directory. Make sure to change it between - experiments to avoid log of previous experiment from being overwritten. + Experiment name will be used to produce a folder (unless already) it's + there from previous iterations of the experiment. Logs of the + experiment are saved in the folder with timestamp of start + time as filenames. x_val : ndarray User specified cross-validation data. (Default is None). y_val : ndarray @@ -120,12 +121,20 @@ def model(): ----------------- clear_session : bool If the backend session is cleared between every permutation. + save_weights : bool + If set to False, then model weights will not be saved and best_model + and some other features will not work. Will reduce memory pressure + on very large models and high number of rounds/permutations. """ global self - def __init__(self, x, y, params, model, - experiment_name=None, + def __init__(self, + x, + y, + params, + model, + experiment_name, x_val=None, y_val=None, val_split=.3, @@ -144,7 +153,8 @@ def __init__(self, x, y, params, model, minimize_loss=False, disable_progress_bar=False, print_params=False, - clear_session=True,): + clear_session=True, + save_weights=True): self.x = x self.y = y @@ -178,13 +188,14 @@ def __init__(self, x, y, params, model, self.disable_progress_bar = disable_progress_bar self.print_params = print_params - # other + # performance self.clear_session = clear_session + self.save_weights = save_weights # input parameters section ends - self.runtime() + self._runtime() - def runtime(self): + def _runtime(self): from .scan_run import scan_run self = scan_run(self) diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index 8be14a84..0ec17f89 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -3,11 +3,9 @@ def scan_prepare(self): '''Includes all preparation procedures up until starting the first scan through scan_run()''' - import time as ti + from .scan_utils import initialize_log - # create the name for the experiment - if self.experiment_name is None: - self.experiment_name = ti.strftime('%D%H%M%S').replace('/', '') + self._experiment_log = initialize_log(self) # for the case where x_val or y_val is missing when other is present self.custom_val_split = False diff --git a/talos/scan/scan_round.py b/talos/scan/scan_round.py index 073e926c..212c2147 100644 --- a/talos/scan/scan_round.py +++ b/talos/scan/scan_round.py @@ -3,7 +3,7 @@ def scan_round(self): '''The main operational function that manages the experiment on the level of execution of each round.''' - import time as ti + import time import gc # print round params @@ -11,12 +11,12 @@ def scan_round(self): print(self.round_params) # set start time - round_start = ti.strftime('%D-%H%M%S') - start = ti.time() + round_start = time.strftime('%D-%H%M%S') + start = time.time() # fit the model from ..model.ingest_model import ingest_model - self.model_history, self.keras_model = ingest_model(self) + self.model_history, self.round_model = ingest_model(self) self.round_history.append(self.model_history.history) # handle logging of results @@ -27,14 +27,27 @@ def scan_round(self): from ..reducers.reduce_run import reduce_run self = reduce_run(self) - # save model and weights - self.saved_models.append(self.keras_model.to_json()) - self.saved_weights.append(self.keras_model.get_weights()) + try: + # save model and weights + self.saved_models.append(self.round_model.to_json()) + + if self.save_weights: + self.saved_weights.append(self.round_model.get_weights()) + else: + self.saved_weights.append(None) + + except AttributeError as e: + # make sure that the error message is from torch + if str(e) == "'Model' object has no attribute 'to_json'": + if self.save_weights: + self.saved_models.append(self.round_model.state_dict()) + else: + self.saved_weights.append(None) # clear tensorflow sessions if self.clear_session is True: - del self.keras_model + del self.round_model gc.collect() # try TF specific and pass for everyone else diff --git a/talos/scan/scan_utils.py b/talos/scan/scan_utils.py new file mode 100644 index 00000000..5c2fe953 --- /dev/null +++ b/talos/scan/scan_utils.py @@ -0,0 +1,21 @@ +def initialize_log(self): + + import time + import os + + # create the experiment folder (unless one is already there) + try: + path = os.getcwd() + os.mkdir(path + '/' + self.experiment_name) + except FileExistsError: + pass + + self._experiment_id = time.strftime('%D%H%M%S').replace('/', '') + _file_name = self._experiment_id + '.csv' + _experiment_log = './' + self.experiment_name + '/' + _file_name + + f = open(_experiment_log, 'w') + f.write('') + f.close() + + return _experiment_log diff --git a/talos/templates/datasets.py b/talos/templates/datasets.py index d1898998..d47aa0af 100755 --- a/talos/templates/datasets.py +++ b/talos/templates/datasets.py @@ -131,3 +131,45 @@ def breast_cancer(): y = (y == 'M').astype(int) return x, y + + +def mnist(): + + '''Note that this dataset, unlike other Talos datasets,returns: + + x_train, y_train, x_val, y_val''' + + import keras + import numpy as np + + # the data, split between train and test sets + (x_train, y_train), (x_val, y_val) = keras.datasets.mnist.load_data() + + # input image dimensions + img_rows, img_cols = 28, 28 + + if keras.backend.image_data_format() == 'channels_first': + + x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) + x_val = x_val.reshape(x_val.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + + else: + x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) + x_val = x_val.reshape(x_val.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + + x_train = x_train.astype('float32') + x_val = x_val.astype('float32') + x_train /= 255 + x_val /= 255 + + classes = len(np.unique(y_train)) + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, classes) + y_val = keras.utils.to_categorical(y_val, classes) + + print("Use input_shape %s" % str(input_shape)) + + return x_train, y_train, x_val, y_val diff --git a/talos/templates/pipelines.py b/talos/templates/pipelines.py index 0c802d46..75175d41 100644 --- a/talos/templates/pipelines.py +++ b/talos/templates/pipelines.py @@ -6,6 +6,7 @@ def breast_cancer(round_limit=2, random_method='uniform_mersenne'): ta.templates.datasets.breast_cancer()[1], ta.templates.params.breast_cancer(), ta.templates.models.breast_cancer, + 'test', round_limit=round_limit) return scan_object @@ -19,6 +20,7 @@ def cervical_cancer(round_limit=2, random_method='uniform_mersenne'): ta.templates.datasets.cervical_cancer()[1], ta.templates.params.cervical_cancer(), ta.templates.models.cervical_cancer, + 'test', round_limit=round_limit) return scan_object @@ -32,6 +34,7 @@ def iris(round_limit=2, random_method='uniform_mersenne'): ta.templates.datasets.iris()[1], ta.templates.params.iris(), ta.templates.models.iris, + 'test', round_limit=round_limit) return scan_object @@ -45,6 +48,7 @@ def titanic(round_limit=2, random_method='uniform_mersenne'): ta.templates.datasets.titanic()[1][:50], ta.templates.params.titanic(), ta.templates.models.titanic, + 'test', round_limit=round_limit) return scan_object diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index bb07271d..7f2ae802 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -10,5 +10,6 @@ import talos.metrics.keras_metrics as metrics from .sequence_generator import SequenceGenerator from .experiment_log_callback import ExperimentLogCallback +from .torch_history import TorchHistory del experiment_log_callback, sequence_generator diff --git a/talos/utils/experiment_log_callback.py b/talos/utils/experiment_log_callback.py index 02c01b40..c878aa36 100644 --- a/talos/utils/experiment_log_callback.py +++ b/talos/utils/experiment_log_callback.py @@ -3,15 +3,33 @@ class ExperimentLogCallback(Callback): - def __init__(self, name, params): + def __init__(self, + experiment_name, + params): '''Takes as input the name of the experiment which will be used for creating a .log file with the outputs and the params - dictionary from the input model in `Scan()`''' + dictionary from the input model in `Scan()` + + experiment_name | str | must match the experiment_name in `Scan()` + + ''' super(ExperimentLogCallback, self).__init__() - self.name = name + import glob + import os + + # get the experiment id first + list_of_files = glob.glob('./' + experiment_name + '/*.csv') + try: + latest_file = max(list_of_files, key=os.path.getmtime) + except ValueError: + print("`experiment_name` has to match `Scan(experiment_name)`") + + self.name = latest_file.replace('.csv', '') + '.log' + + # rest of the config variables self.params = params self.counter = 1 self.new_file = True @@ -24,7 +42,7 @@ def on_train_begin(self, logs={}): def on_train_end(self, logs={}): - f = open(self.name + '.log', 'a+') + f = open(self.name, 'a+') [f.write(','.join(map(str, i)) + '\n') for i in self.final_out] f.close() @@ -37,7 +55,7 @@ def on_epoch_end(self, epoch, logs={}): if len(self.final_out) == 0: try: - open(self.name + '.log', 'r') + open(self.name, 'r') except FileNotFoundError: self.epoch_out.append('id') diff --git a/talos/utils/torch_history.py b/talos/utils/torch_history.py new file mode 100644 index 00000000..9dc358d3 --- /dev/null +++ b/talos/utils/torch_history.py @@ -0,0 +1,14 @@ +class TorchHistory: + + def __init__(self): + + self.history = {'loss': []} + + def append(self, tensor): + + '''Takes in a tensor for loss or other criterion + from PyTorch and stores a python scalar in the history + object. This is for unifying the `Scan()` API between + Keras and PyTorch''' + + self.history['loss'].append(tensor.data.item()) diff --git a/test/commands/test_analyze.py b/test/commands/test_analyze.py index 1061a815..f670dc1f 100644 --- a/test/commands/test_analyze.py +++ b/test/commands/test_analyze.py @@ -1,16 +1,19 @@ def test_analyze(scan_object): - import talos - '''Tests all the attributes available in the Reporting() object''' print('\n >>> Start Analyze()... \n') + import talos + import glob + # for now test with old name r = talos.Reporting(scan_object) # read from file - r = talos.Reporting('test.csv') + list_of_files = glob.glob('./testing_latest/' + '/*.csv') + + r = talos.Reporting(list_of_files[-1]) # and then from scan object r = talos.Analyze(scan_object) diff --git a/test/commands/test_autom8.py b/test/commands/test_autom8.py index 4afd4864..6a3716a6 100644 --- a/test/commands/test_autom8.py +++ b/test/commands/test_autom8.py @@ -37,25 +37,25 @@ def test_autom8(): x, y = wrangle.utils.create_synth_data('binary', 50, 10, 1) p.losses(['binary_crossentropy']) - auto = talos.autom8.AutoScan('binary', 1) + auto = talos.autom8.AutoScan('binary', 'testinga', 1) scan_object = auto.start(x, y, params=p.params) talos.autom8.AutoPredict(scan_object, x, y, x, 'binary') x, y = wrangle.utils.create_synth_data('multi_label', 50, 10, 4) p.losses(['categorical_crossentropy']) - auto = talos.autom8.AutoScan('multi_label', 1) + auto = talos.autom8.AutoScan('multi_label', 'testingb', 1) auto.start(x, y, params=p.params) talos.autom8.AutoPredict(scan_object, x, y, x, 'multi_label') x, y = wrangle.utils.create_synth_data('multi_class', 50, 10, 3) p.losses(['sparse_categorical_crossentropy']) - auto = talos.autom8.AutoScan('multi_class', 1) + auto = talos.autom8.AutoScan('multi_class', 'testingc', 1) auto.start(x, y, params=p.params) talos.autom8.AutoPredict(scan_object, x, y, x, 'multi_class') - x, y = wrangle.utils.create_synth_data('regression', 50, 10, 1) + x, y = wrangle.utils.create_synth_data('continuous', 50, 10, 1) p.losses(['mae']) - auto = talos.autom8.AutoScan('continuous', 1) + auto = talos.autom8.AutoScan('continuous', 'testingd', 1) auto.start(x, y, params=p.params) talos.autom8.AutoPredict(scan_object, x, y, x, 'continuous') diff --git a/test/commands/test_latest.py b/test/commands/test_latest.py index bdd9c782..399c2855 100644 --- a/test/commands/test_latest.py +++ b/test/commands/test_latest.py @@ -33,7 +33,7 @@ def iris_model(x_train, y_train, x_val, y_val, params): out = model.fit(x_train, y_train, - callbacks=[talos.utils.ExperimentLogCallback('minimal_iris', params)], + callbacks=[talos.utils.ExperimentLogCallback('testing_latest', params)], batch_size=params['batch_size'], epochs=params['epochs'], validation_data=[x_val, y_val], @@ -44,6 +44,9 @@ def iris_model(x_train, y_train, x_val, y_val, params): scan_object = talos.Scan(x, y, model=iris_model, params=p, - round_limit=5) + experiment_name='testing_latest', + round_limit=5, + reduction_method='gamify', + save_weights=False) print('finised Latest Features \n') diff --git a/test/commands/test_rest.py b/test/commands/test_rest.py index 3c140b2b..921e5af8 100644 --- a/test/commands/test_rest.py +++ b/test/commands/test_rest.py @@ -5,13 +5,11 @@ def test_rest(scan_object): import talos import random - name = str(hash(random.random())) - print('\n ...Deploy()... \n') - talos.Deploy(scan_object, name, 'val_acc') + talos.Deploy(scan_object, 'testing_deploy', 'val_acc') print('\n ...Restore()... \n') - talos.Restore(name + '.zip') + talos.Restore('testing_deploy' + '.zip') x, y = talos.templates.datasets.breast_cancer() x = x[:50] diff --git a/test/commands/test_scan.py b/test/commands/test_scan.py index a87bba4a..6c1ad7a7 100644 --- a/test/commands/test_scan.py +++ b/test/commands/test_scan.py @@ -53,7 +53,7 @@ def iris_model(x_train, y_train, x_val, y_val, params): y=y, params=p, model=iris_model, - experiment_name='test', + experiment_name='testingq', val_split=0.3, random_method='uniform_mersenne', round_limit=15, @@ -86,7 +86,7 @@ def iris_model(x_train, y_train, x_val, y_val, params): y=y, params=p, model=iris_model, - experiment_name=None, + experiment_name="testing2", x_val=x, y_val=y, random_method='latin_suduko', @@ -101,7 +101,7 @@ def iris_model(x_train, y_train, x_val, y_val, params): y=y, params=p, model=iris_model, - experiment_name=None, + experiment_name="testing3", x_val=None, y_val=None, val_split=0.3, diff --git a/test/commands/test_templates.py b/test/commands/test_templates.py index e0bd1193..ab9657a4 100644 --- a/test/commands/test_templates.py +++ b/test/commands/test_templates.py @@ -9,28 +9,28 @@ def test_templates(): y = y[:50] model = talos.templates.models.titanic p = talos.templates.params.titanic() - talos.Scan(x, y, p, model, round_limit=2) + talos.Scan(x, y, p, model, 'test', round_limit=2) x, y = talos.templates.datasets.iris() x = x[:50] y = y[:50] model = talos.templates.models.iris p = talos.templates.params.iris() - talos.Scan(x, y, p, model, round_limit=2) + talos.Scan(x, y, p, model, 'test', round_limit=2) x, y = talos.templates.datasets.cervical_cancer() x = x[:50] y = y[:50] model = talos.templates.models.cervical_cancer p = talos.templates.params.cervical_cancer() - talos.Scan(x, y, p, model, round_limit=2) + talos.Scan(x, y, p, model, 'test', round_limit=2) x, y = talos.templates.datasets.breast_cancer() x = x[:50] y = y[:50] model = talos.templates.models.breast_cancer p = talos.templates.params.breast_cancer() - talos.Scan(x, y, p, model, round_limit=2) + talos.Scan(x, y, p, model, 'test', round_limit=2) x, y = talos.templates.datasets.icu_mortality(50) x, y = talos.templates.datasets.telco_churn(.3) diff --git a/test/performance/memory_pressure.py b/test/performance/memory_pressure.py new file mode 100644 index 00000000..23019d85 --- /dev/null +++ b/test/performance/memory_pressure.py @@ -0,0 +1,59 @@ +import talos +from talos.utils import SequenceGenerator + +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras.utils import Sequence +import numpy as np + +p = {'activation':['relu'], + 'optimizer': ['AdaDelta'], + 'losses': ['categorical_crossentropy'], + 'dropout': [.2], + 'batch_size': [256], + 'epochs': [1, 1, 1, 1, 1]} + +x_train, y_train, x_val, y_val = talos.templates.datasets.mnist() + +@profile +def talos_version(): + + def mnist_model(x_train, y_train, x_val, y_val, params): + + model = Sequential() + model.add(Conv2D(32, kernel_size=(3, 3), activation=params['activation'], input_shape=(28, 28, 1))) + model.add(Flatten()) + model.add(Dense(128, activation=params['activation'])) + model.add(Dropout(params['dropout'])) + model.add(Dense(10, activation='softmax')) + + model.compile(optimizer=params['optimizer'], + loss=params['losses'], + metrics=['acc', talos.utils.metrics.f1score]) + + out = model.fit_generator(SequenceGenerator(x_train, + y_train, + batch_size=params['batch_size']), + epochs=params['epochs'], + validation_data=[x_val, y_val], + callbacks=[], + workers=4, + verbose=0) + + return out, model + + scan_object = talos.Scan(x=x_train, + y=y_train, + x_val=x_val, + y_val=y_val, + params=p, + model=mnist_model, + experiment_name='mnist', + save_weights=False) + + +if __name__ == "__main__": + + talos_version() diff --git a/test/performance/memory_pressure_check.py b/test/performance/memory_pressure_check.py new file mode 100644 index 00000000..02780e01 --- /dev/null +++ b/test/performance/memory_pressure_check.py @@ -0,0 +1,28 @@ +if __name__ == '__main__': + + import numpy as np + import pandas as pd + import os + + print('\n Memory Pressure Test Starts...\n') + + for i in os.listdir(): + if 'mprofile_' in i: + df = pd.read_csv(i, sep=' ', error_bad_lines=False) + + df.columns = ['null', 'memory', 'time'] + df.drop('null', 1, inplace=True) + + std_limit = 5 + highest_limit = 800 + + std = np.std(np.array(df.memory.values[1500:])) + highest = df.memory.max() + + if std > std_limit: + raise Exception('MEMORY TEST FAILED: Standard deviation of memory pressure is %d which is above the %d limit' % (std, std_limit)) + + if highest > highest_limit: + raise Exception('MEMORY TEST FAILED: Max memory is %d which is above the %d limit' % (highest, highest_limit)) + + print("\n Memory Pressure Test Passed \n") diff --git a/test_local.sh b/test_local.sh new file mode 100755 index 00000000..bd0984c9 --- /dev/null +++ b/test_local.sh @@ -0,0 +1,14 @@ +export MPLBACKEND=agg + +mprof run ./test/performance/memory_pressure.py +python ./test/performance/memory_pressure_check.py +python test_script.py +rm *.zip +rm *.csv +rm -rf 15* +rm ./test/*.csv +rm -rf testing* +rm -rf test_latest +rm mprofile_*.dat +rm -rf iris_test +rm ./test/*.log