Different Instances of IterGP¶
IterGP defines a whole family of GP approximation methods, whose approximate posterior mean may converge at different speeds. However, its uncertainty always exactly quantifies the relative distance to the latent function, analogous to an exact Gaussian process. Here, we will investigate different instances of IterGP and how they compare.
[1]:
from IPython.display import HTML
from matplotlib import animation
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='svg'
[2]:
import sys
sys.path.insert(
0, "../../../../experiments"
) # Needed to import other GP approximations
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
import models
import probnum
from probnum import backend, linops, randvars
from probnum.linalg.solvers import ProbabilisticLinearSolver, information_ops
from probnum.randprocs import kernels, mean_fns
import pykeops
from itergp import GaussianProcess, datasets, methods
from itergp.methods import belief_updates, policies, stopping_criteria
pykeops.verbose = False
Uncertainty Decomposition¶
First, we’ll illustrate how the reduction in computational uncertainty varies between different IterGP instances.
[3]:
# Generate data
rng_state = backend.random.rng_state(42)
num_data = 10
input_shape = ()
output_shape = ()
rng_state, rng_state_data = backend.random.split(rng_state, num=2)
data = datasets.SyntheticDataset(
rng_state=rng_state,
size=(num_data, num_data),
input_shape=input_shape,
output_shape=output_shape,
noise_var=0.1,
)
X = data.train.X
y = data.train.y
[4]:
# Model
mean_fn = mean_fns.Zero(input_shape=input_shape, output_shape=output_shape)
kernel = kernels.Matern(input_shape=input_shape, nu=1.5, lengthscale=0.15)
gp = GaussianProcess(mean_fn, kernel)
# Likelihood
sigma_sq = 0.1
noise = randvars.Normal(
mean=backend.zeros(y.shape),
cov=linops.Scaling(sigma_sq, shape=(num_data, num_data)),
)
[5]:
# Prediction
Xnew = backend.linspace(-1.5, 1.5, 1000)
gp_post_true = gp.condition_on_data(X, y, b=noise, approx_method=methods.Cholesky())
[6]:
# Approximation methods setup
pseudo_inputs = backend.asarray(
[-1.0, -0.5, 0.25, 1.0, 0.0, 0.75, -0.75, -1.25, -0.25, 0.5]
)
class AdverserialSolver(ProbabilisticLinearSolver):
def __init__(
self,
atol: float = 1e-5,
rtol: float = 1e-5,
maxiter: int = None,
):
super().__init__(
policy=policies.AdverserialPolicy(policies.UnitVectorPolicy()),
information_op=information_ops.ProjectedResidualInformationOp(),
belief_update=belief_updates.ProjectedResidualBeliefUpdate(),
stopping_criterion=stopping_criteria.MaxIterationsStoppingCriterion(
maxiter=maxiter,
)
| probnum.linalg.solvers.stopping_criteria.ResidualNormStoppingCriterion(
atol=atol, rtol=rtol
),
)
# Plot setup
fig, axs = plt.subplots(
nrows=6,
ncols=2,
figsize=(14, 14),
sharex=True,
squeeze=False,
gridspec_kw={"height_ratios": [2, 1, 2, 1, 2, 1]},
)
fig.patch.set_alpha(0.0) # set figure background opacity to 0
plt.close()
def animate(i):
for ax in axs.flatten():
ax.cla()
# Cholesky
gp_post = gp.condition_on_data(
X, y, b=noise, approx_method=methods.Cholesky(maxrank=i)
)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[0, 0]
)
axs[0, 0].set(ylim=[-2.75, 2.75], ylabel="$y$", title="Cholesky")
axs[1, 0].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[1, 0].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[1, 0].set(xlabel="$x$", ylabel="Variance")
axs[1, 0].set_ylim(bottom=0)
# Pseudo-Input
pseudo_input_method = methods.PseudoInput(pseudo_inputs=pseudo_inputs[0:i])
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=pseudo_input_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[0, 1]
)
axs[0, 1].set(ylabel="$y$", title="Pseudo-Input")
data_range = X.max() - X.min()
data_width = 0.015 * data_range
for j in [0, 1]:
for z in pseudo_input_method.pseudo_inputs:
axs[j, 1].axvspan(
xmin=z - 0.5 * data_width,
xmax=z + 0.5 * data_width,
color="black",
alpha=0.2,
zorder=-10,
lw=0.0,
)
axs[1, 1].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[1, 1].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[1, 1].set(xlabel="$x$", ylabel="Variance")
axs[1, 1].set_ylim(bottom=0)
# Conjugate Gradient
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=methods.CG(maxiter=i))
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[2, 0]
)
axs[2, 0].set(ylim=[-2.75, 2.75], ylabel="$y$", title="CG")
axs[3, 0].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[3, 0].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[3, 0].set(xlabel="$x$", ylabel="Variance")
axs[3, 0].set_ylim(bottom=0)
# Spectral (descending)
gp_post = gp.condition_on_data(
X, y, b=noise, approx_method=methods.PBR(maxiter=i, descending=True)
)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[2, 1]
)
axs[2, 1].set(ylim=[-2.75, 2.75], ylabel="$y$", title="Spectral (Descending)")
axs[3, 1].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[3, 1].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[3, 1].set(xlabel="$x$", ylabel="Variance")
axs[3, 1].set_ylim(bottom=0)
# Spectral (ascending)
gp_post = gp.condition_on_data(
X, y, b=noise, approx_method=methods.PBR(maxiter=i, descending=False)
)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[4, 1]
)
axs[4, 1].set(ylim=[-2.75, 2.75], ylabel="$y$", title="Spectral (Ascending)")
axs[5, 1].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[5, 1].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[5, 1].set(xlabel="$x$", ylabel="Variance")
axs[5, 1].set_ylim(bottom=0)
# Spectral (ascending)
gp_post = gp.condition_on_data(
X, y, b=noise, approx_method=AdverserialSolver(maxiter=i)
)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[4, 0]
)
axs[4, 0].set(ylim=[-2.75, 2.75], ylabel="$y$", title="Adverserial")
axs[5, 0].fill_between(
x=Xnew,
y1=backend.zeros_like(Xnew),
y2=gp_post_true.var(Xnew),
lw=0.0,
alpha=0.4,
color="C0",
)
axs[5, 0].fill_between(
x=Xnew,
y1=gp_post_true.var(Xnew),
y2=gp_post.var(Xnew),
lw=0.25,
alpha=0.4,
color="C2",
)
axs[5, 0].set(xlabel="$x$", ylabel="Variance")
axs[5, 0].set_ylim(bottom=0)
# True GP Posterior
for m in range(axs.shape[0]):
if m % 2 == 0:
for j in range(axs.shape[1]):
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew),
lw=1,
color="black",
zorder=-1,
label="True GP mean",
)
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew) + 2 * gp_post_true.std(Xnew),
linestyle="--",
lw=1,
color="black",
zorder=-1,
)
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew) - 2 * gp_post_true.std(Xnew),
linestyle="--",
lw=1,
color="black",
zorder=-1,
)
fig.suptitle(f"Iteration {i}")
fig.tight_layout()
from IPython.display import HTML
from matplotlib import animation
# Create animation
anim = animation.FuncAnimation(
fig, func=animate, frames=num_data + 1, interval=1250, repeat_delay=4000, blit=False
)
# Create interactive plot
HTML(anim.to_jshtml())
[6]:
Comparison of Different Approximation Methods¶
We can define a (quadratically expensive) IterGP policy that is very similar in its posterior mean to SVGP and Nyström. To better understand the difference (for a fixed set of inducing points), let’s compare to these classic inducing point methods.
[7]:
# Plot setup
fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(14, 14), sharex=True, sharey=True)
fig.patch.set_alpha(0.0) # set figure background opacity to 0
plt.close()
def animate(i):
for ax in axs.flatten():
ax.cla()
# Cholesky
gp_post = gp.condition_on_data(
X, y, b=noise, approx_method=methods.Cholesky(maxrank=i)
)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[0, 0]
)
axs[0, 0].set(ylim=[-2.75, 2.75], ylabel="$y$", title="Cholesky")
# CG
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=methods.CG(maxiter=i))
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[0, 1]
)
axs[0, 1].set(ylabel="$y$", title="CG")
# Pseudo-Input
pseudo_input_method = methods.PseudoInput(pseudo_inputs=pseudo_inputs[0:i])
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=pseudo_input_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[1, 0]
)
axs[1, 0].set(ylabel="$y$", title="Pseudo-Input")
data_range = X.max() - X.min()
data_width = 0.015 * data_range
for z in pseudo_input_method.pseudo_inputs:
axs[1, 0].axvspan(
xmin=z - 0.5 * data_width,
xmax=z + 0.5 * data_width,
color="black",
alpha=0.2,
zorder=-10,
lw=0.0,
)
# Projected Bayes regressor
pbr_method = methods.PBR(maxiter=i, descending=True)
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=pbr_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[1, 1]
)
axs[1, 1].set(ylabel="$y$", title="Projected Bayes Regressor")
# SVGP
svgp = models.SVGP(prior=gp, X=X, Y=y, b=noise, inducing_points=pseudo_inputs[0:i])
svgp.plot(X=Xnew, data=(X, y), ax=axs[2, 0])
axs[2, 0].set(ylabel="$y$", title="SVGP")
for z in pseudo_input_method.pseudo_inputs:
axs[2, 0].axvspan(
xmin=z - 0.5 * data_width,
xmax=z + 0.5 * data_width,
color="black",
alpha=0.2,
zorder=-10,
lw=0.0,
)
# Nyström
nygp = models.NystroemGP(
prior=gp, X=X, Y=y, b=noise, inducing_points=pseudo_inputs[0:i]
)
nygp.plot(X=Xnew, data=(X, y), ax=axs[2, 1])
axs[2, 1].set(ylabel="$y$", title="Nyström / SoR")
for z in pseudo_input_method.pseudo_inputs:
axs[2, 1].axvspan(
xmin=z - 0.5 * data_width,
xmax=z + 0.5 * data_width,
color="black",
alpha=0.2,
zorder=-10,
lw=0.0,
)
# Mixed strategy (PseudoInput + CG)
mixed_method = methods.MixedStrategy(
base_policies=(
policies.PseudoInputPolicy(pseudo_inputs=pseudo_inputs[0:i]),
policies.GradientPolicy(),
),
iters=(4,),
maxiter=i,
)
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=mixed_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[3, 0]
)
axs[3, 0].set(ylabel="$y$", title="Mixed (PseudoInput + CG)")
for z in pseudo_input_method.pseudo_inputs:
axs[3, 0].axvspan(
xmin=z - 0.5 * data_width,
xmax=z + 0.5 * data_width,
color="black",
alpha=0.2,
zorder=-10,
lw=0.0,
)
# Mixed strategy (Cholesky + CG)
mixed_method = methods.MixedStrategy(
base_policies=(policies.UnitVectorPolicy(), policies.GradientPolicy()),
iters=(4,),
maxiter=i,
)
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=mixed_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[3, 1]
)
axs[3, 1].set(ylabel="$y$", title="Mixed (Cholesky + CG)")
# Mixed strategy (CG + PBR)
mixed_method = methods.MixedStrategy(
base_policies=(policies.GradientPolicy(), policies.EigenvectorPolicy()),
iters=(4,),
maxiter=i,
)
gp_post = gp.condition_on_data(X, y, b=noise, approx_method=mixed_method)
gp_post.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=True, ax=axs[4, 0]
)
axs[4, 0].set(xlabel="$X$", ylabel="$y$", title="Mixed (CG + PBR)")
# Exact
gp_post_true.plot(
X=Xnew, data=(X, y), samples=0, computational_predictive=False, ax=axs[4, 1]
)
axs[4, 1].set(xlabel="$X$", ylabel="$y$", title="Mathematical Posterior")
# True GP Posterior
for m in range(axs.shape[0]):
for j in range(axs.shape[1]):
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew),
lw=1,
color="black",
zorder=-1,
label="True GP mean",
)
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew) + 2 * gp_post_true.std(Xnew),
linestyle="--",
lw=1,
color="black",
zorder=-1,
)
axs[m, j].plot(
Xnew,
gp_post_true.mean(Xnew) - 2 * gp_post_true.std(Xnew),
linestyle="--",
lw=1,
color="black",
zorder=-1,
)
fig.suptitle(f"Iteration {i}")
fig.tight_layout()
from IPython.display import HTML
from matplotlib import animation
# Create animation
anim = animation.FuncAnimation(
fig, func=animate, frames=num_data + 1, interval=1250, repeat_delay=4000, blit=False
)
# Create interactive plot
HTML(anim.to_jshtml())
[7]:
[ ]: