Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add basic GPflow support #178

Merged
merged 2 commits into from Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -539,3 +539,4 @@ MigrationBackup/
*.pkl
*.npy
*.joblib
dev/
477 changes: 477 additions & 0 deletions dev/play_w_gpflow.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions docs/api.rst
Expand Up @@ -59,6 +59,14 @@ For quantile regression with LightGBM
:special-members:


For GPR with GPFlow
.......................................

.. automodule:: pyepal.pal.pal_gpflowgpr
:members:
:show-inheritance:
:special-members:


Schedules for hyperparameter optimization
...........................................
Expand Down
2 changes: 2 additions & 0 deletions pyepal/__init__.py
Expand Up @@ -21,6 +21,7 @@
from .pal.pal_coregionalized import PALCoregionalized
from .pal.pal_finite_ensemble import PALJaxEnsemble
from .pal.pal_gbdt import PALGBDT
from .pal.pal_gpflowgpr import PALGPflowGPR
from .pal.pal_gpy import PALGPy
from .pal.pal_neural_tangent import PALNT
from .pal.pal_sklearn import PALSklearn
Expand All @@ -39,6 +40,7 @@
"PALCoregionalized",
"PALGBDT",
"PALGPy",
"PALGPflowGPR",
"PALSklearn",
"PALJaxEnsemble",
"PALNT",
Expand Down
130 changes: 130 additions & 0 deletions pyepal/pal/pal_gpflowgpr.py
@@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-
# Copyright 2020 PyePAL authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""PAL using GPy GPR models"""
import concurrent.futures
from functools import partial

import numpy as np

from .pal_base import PALBase
from .schedules import linear
from .validate_inputs import validate_njobs, validate_number_models

__all__ = ["PALGPflowGPR"]


def _train_model_picklable(i, models, opt, opt_kwargs):
print(f"training {i}")
model = models[i]
_ = opt.minimize(model.training_loss, model.trainable_variables, options=opt_kwargs)
return model


class PALGPflowGPR(PALBase):
"""PAL class for a list of GPFlow GPR models, with one model per objective.
Please consider that there are specific multioutput models
(https://gpflow.readthedocs.io/en/master/notebooks/advanced/multioutput.html)
for which the train and prediction function would need to be adjusted.
You might also consider using streaming GPRs
(https://github.com/thangbui/streaming_sparse_gp).
In future releases we might support this case automatically
(i.e., handle the case in which only one model is provided).
"""

def __init__(self, *args, **kwargs):
"""Contruct the PALGPflowGPR instance

Args:
X_design (np.array): Design space (feature matrix)
models (list): Machine learning models
ndim (int): Number of objectives
epsilon (Union[list, float], optional): Epsilon hyperparameter.
Defaults to 0.01.
delta (float, optional): Delta hyperparameter. Defaults to 0.05.
beta_scale (float, optional): Scaling parameter for beta.
If not equal to 1, the theoretical guarantees do not necessarily hold.
Also note that the parametrization depends on the kernel type.
Defaults to 1/9.
goals (List[str], optional): If a list, provide "min" for every objective
that shall be minimized and "max" for every objective
that shall be maximized. Defaults to None, which means
that the code maximizes all objectives.
coef_var_threshold (float, optional): Use only points with
a coefficient of variation below this threshold
in the classification step. Defaults to 3.
opt (function, optional): Optimizer function for the GPR parameters.
If None (default), then we will use ` gpflow.optimizers.Scipy()`
opt_kwargs (dict, optional): Keyword arguments passed to the optimizer.
If None, PyePAL will pass `{"maxiter": 100}`
n_jobs (int): Number of parallel threads that are used to fit
the GPR models. Defaults to 1.
"""
import gpflow # pylint:disable=import-outside-toplevel

self.n_jobs = validate_njobs(kwargs.pop("n_jobs", 1))
self.opt = kwargs.pop("opt", gpflow.optimizers.Scipy())
self.opt_kwargs = kwargs.pop("opt_kwargs", {"maxiter": 100})
super().__init__(*args, **kwargs)

validate_number_models(self.models, self.ndim)
# validate_gpy_model(self.models)

def _set_data(self):
from gpflow.models.util import ( # pylint:disable=import-outside-toplevel
data_input_to_tensor,
)

for i, model in enumerate(self.models):
model.data = data_input_to_tensor(
(
self.design_space[self.sampled[:, i]],
self.y[self.sampled[:, i], i].reshape(-1, 1),
)
)

def _train(self):
models = []
train_model_pickleable_partial = partial(
_train_model_picklable,
models=self.models,
opt=self.opt,
opt_kwargs=self.opt_kwargs,
)
with concurrent.futures.ThreadPoolExecutor(
max_workers=self.n_jobs,
) as executor:
for model in executor.map(train_model_pickleable_partial, range(self.ndim)):
models.append(model)
self.models = models
print("training done")

def _predict(self):
means, stds = [], []
for model in self.models:
mean, std = model.predict_f(self.design_space)
mean = mean.numpy()
std = std.numpy()
means.append(mean.reshape(-1, 1))
stds.append(np.sqrt(std.reshape(-1, 1)))

self.means = np.hstack(means)
self.std = np.hstack(stds)

def _set_hyperparameters(self):
pass

def _should_optimize_hyperparameters(self) -> bool:
return linear(self.iteration, 10)
7 changes: 6 additions & 1 deletion setup.py
Expand Up @@ -31,6 +31,7 @@
]
gbdt_requirements = ["lightgbm==3.*"]
neural_tangents_requirements = ["neural_tangents==0.*", "jaxlib==0.*"]
gpflow_requirements = ["gpflow"]
setup(
name="pyepal",
version=versioneer.get_version(),
Expand Down Expand Up @@ -62,7 +63,11 @@
"GPy": gpy_requirements,
"GBDT": gbdt_requirements,
"neural_tangents": neural_tangents_requirements,
"all": neural_tangents_requirements + gbdt_requirements + gpy_requirements,
"all": neural_tangents_requirements
+ gbdt_requirements
+ gpy_requirements
+ gpflow_requirements,
"gpflow": gpflow_requirements,
},
author="PyePAL authors",
author_email="kevin.jablonka@epfl.ch, brian.yoo@basf.com",
Expand Down
59 changes: 59 additions & 0 deletions tests/test_pal_gpflowgpr.py
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright 2020 PyePAL authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Testing the PALGPflowGPR class"""
import numpy as np

from pyepal.pal.pal_gpflowgpr import PALGPflowGPR


def test_pal_gpflow(binh_korn_points):
"""Test basic functionality of the PALGpy class"""
import gpflow # pylint:disable=import-outside-toplevel

X_binh_korn, y_binh_korn = binh_korn_points # pylint:disable=invalid-name
X_binh_korn = ( # pylint:disable=invalid-name
X_binh_korn - X_binh_korn.mean()
) / X_binh_korn.std() # pylint:disable=invalid-name
y_binh_korn = (
y_binh_korn - y_binh_korn.mean()
) / y_binh_korn.std() + 0.01 * np.random.rand()

def build_model(x, y): # pylint:disable=invalid-name
k = gpflow.kernels.RationalQuadratic()
m = gpflow.models.GPR( # pylint:disable=invalid-name
data=(x, y), kernel=k, mean_function=None
)
return m

sample_idx = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70])
model_0 = build_model(X_binh_korn[sample_idx], y_binh_korn[sample_idx])
model_1 = build_model(X_binh_korn[sample_idx], y_binh_korn[sample_idx])

palinstance = PALGPflowGPR(
X_binh_korn,
[model_0, model_1],
2,
beta_scale=1,
epsilon=0.01,
delta=0.01,
opt_kwargs={"maxiter": 50},
)
palinstance.cross_val_points = 0
palinstance.update_train_set(sample_idx, y_binh_korn[sample_idx])
idx = palinstance.run_one_step()
assert idx[0] not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 60, 70]
assert palinstance.number_sampled_points > 0
assert sum(palinstance.discarded) == 0