Source code for adaptive.learner.average_learner

from __future__ import annotations

from math import sqrt
from typing import Callable

import cloudpickle
import numpy as np

from adaptive.learner.base_learner import BaseLearner
from adaptive.notebook_integration import ensure_holoviews
from adaptive.types import Float, Int, Real
from adaptive.utils import (
    assign_defaults,
    cache_latest,
    partial_function_from_dataframe,
)

try:
    import pandas

    with_pandas = True

except ModuleNotFoundError:
    with_pandas = False


[docs]class AverageLearner(BaseLearner): """A naive implementation of adaptive computing of averages. The learned function must depend on an integer input variable that represents the source of randomness. Parameters ---------- atol : float Desired absolute tolerance. rtol : float Desired relative tolerance. min_npoints : int Minimum number of points to sample. Attributes ---------- data : dict Sampled points and values. pending_points : set Points that still have to be evaluated. npoints : int Number of evaluated points. """ def __init__( self, function: Callable[[int], Real], atol: float | None = None, rtol: float | None = None, min_npoints: int = 2, ) -> None: if atol is None and rtol is None: raise Exception("At least one of `atol` and `rtol` should be set.") if atol is None: atol = np.inf if rtol is None: rtol = np.inf self.data = {} self.pending_points = set() self.function = function # type: ignore self.atol = atol self.rtol = rtol self.npoints = 0 # Cannot estimate standard deviation with fewer than 2 points. self.min_npoints = max(min_npoints, 2) self.sum_f: Real = 0.0 self.sum_f_sq: Real = 0.0
[docs] def new(self) -> AverageLearner: """Create a copy of `~adaptive.AverageLearner` without the data.""" return AverageLearner(self.function, self.atol, self.rtol, self.min_npoints)
@property def n_requested(self) -> int: return self.npoints + len(self.pending_points)
[docs] def to_numpy(self): """Data as NumPy array of size (npoints, 2) with seeds and values.""" return np.array(sorted(self.data.items()))
[docs] def to_dataframe( # type: ignore[override] self, with_default_function_args: bool = True, function_prefix: str = "function.", seed_name: str = "seed", y_name: str = "y", ) -> pandas.DataFrame: """Return the data as a `pandas.DataFrame`. Parameters ---------- with_default_function_args : bool, optional Include the ``learner.function``'s default arguments as a column, by default True function_prefix : str, optional Prefix to the ``learner.function``'s default arguments' names, by default "function." seed_name : str, optional Name of the ``seed`` parameter, by default "seed" y_name : str, optional Name of the output value, by default "y" Returns ------- pandas.DataFrame Raises ------ ImportError If `pandas` is not installed. """ if not with_pandas: raise ImportError("pandas is not installed.") df = pandas.DataFrame(sorted(self.data.items()), columns=[seed_name, y_name]) df.attrs["inputs"] = [seed_name] df.attrs["output"] = y_name if with_default_function_args: assign_defaults(self.function, df, function_prefix) return df
[docs] def load_dataframe( # type: ignore[override] self, df: pandas.DataFrame, with_default_function_args: bool = True, function_prefix: str = "function.", seed_name: str = "seed", y_name: str = "y", ): """Load data from a `pandas.DataFrame`. If ``with_default_function_args`` is True, then ``learner.function``'s default arguments are set (using `functools.partial`) from the values in the `pandas.DataFrame`. Parameters ---------- df : pandas.DataFrame The data to load. with_default_function_args : bool, optional The ``with_default_function_args`` used in ``to_dataframe()``, by default True function_prefix : str, optional The ``function_prefix`` used in ``to_dataframe``, by default "function." seed_name : str, optional The ``seed_name`` used in ``to_dataframe``, by default "seed" y_name : str, optional The ``y_name`` used in ``to_dataframe``, by default "y" """ self.tell_many(df[seed_name].values, df[y_name].values) if with_default_function_args: self.function = partial_function_from_dataframe( self.function, df, function_prefix )
[docs] def ask(self, n: int, tell_pending: bool = True) -> tuple[list[int], list[Float]]: points = list(range(self.n_requested, self.n_requested + n)) if any(p in self.data or p in self.pending_points for p in points): # This means some of the points `< self.n_requested` do not exist. points = list( set(range(self.n_requested + n)) - set(self.data) - set(self.pending_points) )[:n] loss_improvements = [self._loss_improvement(n) / n] * n if tell_pending: for p in points: self.tell_pending(p) return points, loss_improvements
[docs] def tell(self, n: Int, value: Real) -> None: if n in self.data: # The point has already been added before. return self.data[n] = value self.pending_points.discard(n) self.sum_f += value self.sum_f_sq += value**2 self.npoints += 1
[docs] def tell_pending(self, n: int) -> None: self.pending_points.add(n)
@property def mean(self) -> Float: """The average of all values in `data`.""" return self.sum_f / self.npoints @property def std(self) -> Float: """The corrected sample standard deviation of the values in `data`.""" n = self.npoints if n < self.min_npoints: return np.inf numerator = self.sum_f_sq - n * self.mean**2 if numerator < 0: # in this case the numerator ~ -1e-15 return 0 return sqrt(numerator / (n - 1))
[docs] @cache_latest def loss(self, real: bool = True, *, n=None) -> Float: if n is None: n = self.npoints if real else self.n_requested else: n = n if n < self.min_npoints: return np.inf standard_error = self.std / sqrt(n) aloss = standard_error / self.atol rloss = standard_error / self.rtol mean = self.mean if mean != 0: rloss /= abs(mean) return max(aloss, rloss)
def _loss_improvement(self, n: int) -> Float: loss = self.loss() if np.isfinite(loss): return loss - self.loss(n=self.npoints + n) else: return np.inf
[docs] def remove_unfinished(self): """Remove uncomputed data from the learner.""" self.pending_points = set()
[docs] def plot(self): """Returns a histogram of the evaluated data. Returns ------- holoviews.element.Histogram A histogram of the evaluated data.""" hv = ensure_holoviews() vals = [v for v in self.data.values() if v is not None] if not vals: return hv.Histogram([[], []]) num_bins = int(max(5, sqrt(self.npoints))) vals = hv.Points(vals) return hv.operation.histogram(vals, num_bins=num_bins, dimension="y")
def _get_data(self) -> tuple[dict[int, Real], int, Real, Real]: return (self.data, self.npoints, self.sum_f, self.sum_f_sq) def _set_data(self, data: tuple[dict[int, Real], int, Real, Real]) -> None: self.data, self.npoints, self.sum_f, self.sum_f_sq = data def __getstate__(self): return ( cloudpickle.dumps(self.function), self.atol, self.rtol, self.min_npoints, self._get_data(), ) def __setstate__(self, state): function, atol, rtol, min_npoints, data = state function = cloudpickle.loads(function) self.__init__(function, atol, rtol, min_npoints) self._set_data(data)