Skip to content

Commit

Permalink
feat: add basic GPflow support (#178)
Browse files Browse the repository at this point in the history
* feat: add basic GPflow support

* feat: add basic GPflow support
  • Loading branch information
kjappelbaum committed Jun 15, 2021
1 parent 01d97a4 commit 69a8bec
Show file tree
Hide file tree
Showing 7 changed files with 683 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -539,3 +539,4 @@ MigrationBackup/
*.pkl
*.npy
*.joblib
dev/
477 changes: 477 additions & 0 deletions dev/play_w_gpflow.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions docs/api.rst
Expand Up @@ -59,6 +59,14 @@ For quantile regression with LightGBM
:special-members:


For GPR with GPFlow
.......................................

.. automodule:: pyepal.pal.pal_gpflowgpr
:members:
:show-inheritance:
:special-members:


Schedules for hyperparameter optimization
...........................................
Expand Down
2 changes: 2 additions & 0 deletions pyepal/__init__.py
Expand Up @@ -21,6 +21,7 @@
from .pal.pal_coregionalized import PALCoregionalized
from .pal.pal_finite_ensemble import PALJaxEnsemble
from .pal.pal_gbdt import PALGBDT
from .pal.pal_gpflowgpr import PALGPflowGPR
from .pal.pal_gpy import PALGPy
from .pal.pal_neural_tangent import PALNT
from .pal.pal_sklearn import PALSklearn
Expand All @@ -39,6 +40,7 @@
"PALCoregionalized",
"PALGBDT",
"PALGPy",
"PALGPflowGPR",
"PALSklearn",
"PALJaxEnsemble",
"PALNT",
Expand Down
130 changes: 130 additions & 0 deletions pyepal/pal/pal_gpflowgpr.py
@@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-
# Copyright 2020 PyePAL authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""PAL using GPy GPR models"""
import concurrent.futures
from functools import partial

import numpy as np

from .pal_base import PALBase
from .schedules import linear
from .validate_inputs import validate_njobs, validate_number_models

__all__ = ["PALGPflowGPR"]


def _train_model_picklable(i, models, opt, opt_kwargs):
print(f"training {i}")
model = models[i]
_ = opt.minimize(model.training_loss, model.trainable_variables, options=opt_kwargs)
return model


class PALGPflowGPR(PALBase):
"""PAL class for a list of GPFlow GPR models, with one model per objective.
Please consider that there are specific multioutput models
(https://gpflow.readthedocs.io/en/master/notebooks/advanced/multioutput.html)
for which the train and prediction function would need to be adjusted.
You might also consider using streaming GPRs
(https://github.com/thangbui/streaming_sparse_gp).
In future releases we might support this case automatically
(i.e., handle the case in which only one model is provided).
"""

def __init__(self, *args, **kwargs):
"""Contruct the PALGPflowGPR instance
Args:
X_design (np.array): Design space (feature matrix)
models (list): Machine learning models
ndim (int): Number of objectives
epsilon (Union[list, float], optional): Epsilon hyperparameter.
Defaults to 0.01.
delta (float, optional): Delta hyperparameter. Defaults to 0.05.
beta_scale (float, optional): Scaling parameter for beta.
If not equal to 1, the theoretical guarantees do not necessarily hold.
Also note that the parametrization depends on the kernel type.
Defaults to 1/9.
goals (List[str], optional): If a list, provide "min" for every objective
that shall be minimized and "max" for every objective
that shall be maximized. Defaults to None, which means
that the code maximizes all objectives.
coef_var_threshold (float, optional): Use only points with
a coefficient of variation below this threshold
in the classification step. Defaults to 3.
opt (function, optional): Optimizer function for the GPR parameters.
If None (default), then we will use ` gpflow.optimizers.Scipy()`
opt_kwargs (dict, optional): Keyword arguments passed to the optimizer.
If None, PyePAL will pass `{"maxiter": 100}`
n_jobs (int): Number of parallel threads that are used to fit
the GPR models. Defaults to 1.
"""
import gpflow # pylint:disable=import-outside-toplevel

self.n_jobs = validate_njobs(kwargs.pop("n_jobs", 1))
self.opt = kwargs.pop("opt", gpflow.optimizers.Scipy())
self.opt_kwargs = kwargs.pop("opt_kwargs", {"maxiter": 100})
super().__init__(*args, **kwargs)

validate_number_models(self.models, self.ndim)
# validate_gpy_model(self.models)

def _set_data(self):
from gpflow.models.util import ( # pylint:disable=import-outside-toplevel
data_input_to_tensor,
)

for i, model in enumerate(self.models):
model.data = data_input_to_tensor(
(
self.design_space[self.sampled[:, i]],
self.y[self.sampled[:, i], i].reshape(-1, 1),
)
)

def _train(self):
models = []
train_model_pickleable_partial = partial(
_train_model_picklable,
models=self.models,
opt=self.opt,
opt_kwargs=self.opt_kwargs,
)
with concurrent.futures.ThreadPoolExecutor(
max_workers=self.n_jobs,
) as executor:
for model in executor.map(train_model_pickleable_partial, range(self.ndim)):
models.append(model)
self.models = models
print("training done")

def _predict(self):
means, stds = [], []
for model in self.models:
mean, std = model.predict_f(self.design_space)
mean = mean.numpy()
std = std.numpy()
means.append(mean.reshape(-1, 1))
stds.append(np.sqrt(std.reshape(-1, 1)))

self.means = np.hstack(means)
self.std = np.hstack(stds)

def _set_hyperparameters(self):
pass

def _should_optimize_hyperparameters(self) -> bool:
return linear(self.iteration, 10)
7 changes: 6 additions & 1 deletion setup.py
Expand Up @@ -31,6 +31,7 @@
]
gbdt_requirements = ["lightgbm==3.*"]
neural_tangents_requirements = ["neural_tangents==0.*", "jaxlib==0.*"]
gpflow_requirements = ["gpflow"]
setup(
name="pyepal",
version=versioneer.get_version(),
Expand Down Expand Up @@ -62,7 +63,11 @@
"GPy": gpy_requirements,
"GBDT": gbdt_requirements,
"neural_tangents": neural_tangents_requirements,
"all": neural_tangents_requirements + gbdt_requirements + gpy_requirements,
"all": neural_tangents_requirements
+ gbdt_requirements
+ gpy_requirements
+ gpflow_requirements,
"gpflow": gpflow_requirements,
},
author="PyePAL authors",
author_email="kevin.jablonka@epfl.ch, brian.yoo@basf.com",
Expand Down
59 changes: 59 additions & 0 deletions tests/test_pal_gpflowgpr.py
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright 2020 PyePAL authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Testing the PALGPflowGPR class"""
import numpy as np

from pyepal.pal.pal_gpflowgpr import PALGPflowGPR


def test_pal_gpflow(binh_korn_points):
"""Test basic functionality of the PALGpy class"""
import gpflow # pylint:disable=import-outside-toplevel

X_binh_korn, y_binh_korn = binh_korn_points # pylint:disable=invalid-name
X_binh_korn = ( # pylint:disable=invalid-name
X_binh_korn - X_binh_korn.mean()
) / X_binh_korn.std() # pylint:disable=invalid-name
y_binh_korn = (
y_binh_korn - y_binh_korn.mean()
) / y_binh_korn.std() + 0.01 * np.random.rand()

def build_model(x, y): # pylint:disable=invalid-name
k = gpflow.kernels.RationalQuadratic()
m = gpflow.models.GPR( # pylint:disable=invalid-name
data=(x, y), kernel=k, mean_function=None
)
return m

sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])
model_0 = build_model(X_binh_korn[sample_idx], y_binh_korn[sample_idx])
model_1 = build_model(X_binh_korn[sample_idx], y_binh_korn[sample_idx])

palinstance = PALGPflowGPR(
X_binh_korn,
[model_0, model_1],
2,
beta_scale=1,
epsilon=0.01,
delta=0.01,
opt_kwargs={"maxiter": 50},
)
palinstance.cross_val_points = 0
palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
idx = palinstance.run_one_step()
assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
assert palinstance.number_sampled_points > 0
assert sum(palinstance.discarded) == 0

0 comments on commit 69a8bec

Please sign in to comment.