Source code for adaptive.learner.skopt_learner

from __future__ import annotations

import collections
from typing import TYPE_CHECKING

import numpy as np
from skopt import Optimizer

from adaptive.learner.base_learner import BaseLearner
from adaptive.notebook_integration import ensure_holoviews
from adaptive.utils import cache_latest

if TYPE_CHECKING:
    import pandas


[docs]class SKOptLearner(Optimizer, BaseLearner): """Learn a function minimum using ``skopt.Optimizer``. This is an ``Optimizer`` from ``scikit-optimize``, with the necessary methods added to make it conform to the ``adaptive`` learner interface. Parameters ---------- function : callable The function to learn. **kwargs : Arguments to pass to ``skopt.Optimizer``. """ def __init__(self, function, **kwargs): self.function = function self.pending_points = set() self.data = collections.OrderedDict() self._kwargs = kwargs super().__init__(**kwargs)
[docs] def new(self) -> SKOptLearner: """Return a new `~adaptive.SKOptLearner` without the data.""" return SKOptLearner(self.function, **self._kwargs)
[docs] def tell(self, x, y, fit=True): if isinstance(x, collections.abc.Iterable): self.pending_points.discard(tuple(x)) self.data[tuple(x)] = y super().tell(x, y, fit) else: self.pending_points.discard(x) self.data[x] = y super().tell([x], y, fit)
[docs] def tell_pending(self, x): # 'skopt.Optimizer' takes care of points we # have not got results for. self.pending_points.add(tuple(x))
[docs] def remove_unfinished(self): pass
[docs] @cache_latest def loss(self, real=True): if not self.models: return np.inf else: model = self.models[-1] # Return the in-sample error (i.e. test the model # with the training data). This is not the best # estimator of loss, but it is the cheapest. return 1 - model.score(self.Xi, self.yi)
[docs] def ask(self, n, tell_pending=True): if not tell_pending: raise NotImplementedError( "Asking points is an irreversible " "action, so use `ask(n, tell_pending=True`." ) points = super().ask(n) # TODO: Choose a better estimate for the loss improvement. if self.space.n_dims > 1: return points, [self.loss() / n] * n else: return [p[0] for p in points], [self.loss() / n] * n
@property def npoints(self): """Number of evaluated points.""" return len(self.Xi)
[docs] def plot(self, nsamples=200): hv = ensure_holoviews() if self.space.n_dims > 1: raise ValueError("Can only plot 1D functions") bounds = self.space.bounds[0] if not self.Xi: p = hv.Scatter([]) * hv.Curve([]) * hv.Area([]) else: scatter = hv.Scatter(([p[0] for p in self.Xi], self.yi)) if self.models: model = self.models[-1] xs = np.linspace(*bounds, nsamples) xsp = self.space.transform(xs.reshape(-1, 1).tolist()) y_pred, sigma = model.predict(xsp, return_std=True) # Plot model prediction for function curve = hv.Curve((xs, y_pred)).opts(line_dash="dashed") # Plot 95% confidence interval as colored area around points area = hv.Area( (xs, y_pred - 1.96 * sigma, y_pred + 1.96 * sigma), vdims=["y", "y2"], ).opts(alpha=0.5, line_alpha=0) else: area = hv.Area([]) curve = hv.Curve([]) p = scatter * curve * area # Plot with 5% empty margins such that the boundary points are visible margin = 0.05 * (bounds[1] - bounds[0]) plot_bounds = (bounds[0] - margin, bounds[1] + margin) return p.redim(x={"range": plot_bounds})
def _get_data(self): return [x[0] for x in self.Xi], self.yi def _set_data(self, data): xs, ys = data self.tell_many(xs, ys)
[docs] def to_dataframe( # type: ignore[override] self, with_default_function_args: bool = True, function_prefix: str = "function.", seed_name: str = "seed", y_name: str = "y", ) -> pandas.DataFrame: """Return the data as a `pandas.DataFrame`. Parameters ---------- with_default_function_args : bool, optional Include the ``learner.function``'s default arguments as a column, by default True function_prefix : str, optional Prefix to the ``learner.function``'s default arguments' names, by default "function." TODO Returns ------- pandas.DataFrame Raises ------ ImportError If `pandas` is not installed. """ raise NotImplementedError
[docs] def load_dataframe( # type: ignore[override] self, df: pandas.DataFrame, with_default_function_args: bool = True, function_prefix: str = "function.", seed_name: str = "seed", y_name: str = "y", ): """Load data from a `pandas.DataFrame`. If ``with_default_function_args`` is True, then ``learner.function``'s default arguments are set (using `functools.partial`) from the values in the `pandas.DataFrame`. Parameters ---------- df : pandas.DataFrame The data to load. with_default_function_args : bool, optional The ``with_default_function_args`` used in ``to_dataframe()``, by default True function_prefix : str, optional The ``function_prefix`` used in ``to_dataframe``, by default "function." TODO """ raise NotImplementedError