Solution of least-square problem using Bayesian optimization

Driver:

BayesianLeastSquares

Download script:

bayesian_least_squares.py

The target of the study is to showcase the solution of a non-linear least-squares problem from the NIST statistical reference datasets. As an example, the MGH17 problem is considered which consists of fitting a vectorial model \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^{33}\) with

\[f_i(\mathbf{b}) = b_1 + b_2 \exp(-i \cdot b_4) + b_3 \exp(-i \cdot b_5),\, i = 0, \dots, 32\]

to a target vector with 33 entries. The certified best-fit values are

\[ \begin{align}\begin{aligned}b_1 &= 0.3754100521 \pm 2.0723153551 \cdot 10^{-3}&\\b_2 &= 1.9358469127 \pm 0.22031669222&\\b_3 &= -1.464687136 \pm 0.22175707739&\\b_4 &= 0.1286753464 \pm 4.4861358114\cdot 10^{-3}&\\b_5 &= 0.2212269966 \pm 8.9471996575 \cdot 10^{-3}&\end{aligned}\end{align} \]
  1import sys,os
  2import numpy as np
  3import time
  4import pandas as pd
  5import torch
  6import matplotlib.pyplot as plt
  7import corner #run "pip install corner" if not installed
  8import emcee #run "pip install emcee" if not installed
  9
 10
 11jcm_optimizer_path = r"<JCM_OPTIMIZER_PATH>"
 12sys.path.insert(0, os.path.join(jcm_optimizer_path, "interface", "python"))
 13from jcmoptimizer import Server, Client, Study, Observation
 14server = Server()
 15client = Client(server.host)
 16
 17# Definition of the search domain
 18design_space = [
 19    {'name': 'b1', 'type': 'continuous', 'domain': (0,10)}, 
 20    {'name': 'b2', 'type': 'continuous', 'domain': (0.1,4)},
 21    {'name': 'b3', 'type': 'continuous', 'domain': (-4,-0.1)},
 22    {'name': 'b4', 'type': 'continuous', 'domain': (0.05,1)},
 23    {'name': 'b5', 'type': 'continuous', 'domain': (0.05,1)}
 24]
 25constraints = [
 26    {'name': 'test', 'expression': 'b2 + b3 <= 1.0'}
 27]
 28
 29# Creation of the study object with study_id 'bayesian_least_squares'
 30study = client.create_study(
 31    design_space=design_space,
 32    constraints=constraints,
 33    driver="BayesianLeastSquares",
 34    name="Solution of least-square problem using Bayesian optimization",
 35    study_id="bayesian_least_squares"
 36)
 37#The vectorial model function of the MGH17 problem
 38def model(x: torch.Tensor) -> torch.Tensor:
 39    s = torch.arange(33)
 40    return x[0] + x[1]*torch.exp(-s*x[3]) + x[2]*torch.exp(-s*x[4])
 41
 42#Target vector of the MGH17
 43target=torch.tensor([
 44    8.44E-01, 9.08E-01, 9.32E-01, 9.36E-01, 9.25E-01,
 45    9.08E-01, 8.81E-01, 8.50E-01, 8.18E-01, 7.84E-01,
 46    7.51E-01, 7.18E-01, 6.85E-01, 6.58E-01, 6.28E-01,
 47    6.03E-01, 5.80E-01, 5.58E-01, 5.38E-01, 5.22E-01,
 48    5.06E-01, 4.90E-01, 4.78E-01, 4.67E-01, 4.57E-01,
 49    4.48E-01, 4.38E-01, 4.31E-01, 4.24E-01, 4.20E-01,
 50    4.14E-01, 4.11E-01, 4.06E-01
 51])
 52
 53study.configure(
 54    target_vector=target.tolist(),
 55    max_iter=120,
 56)
 57# Evaluation of the black-box function for specified design parameters
 58def evaluate(study: Study, b1: float, b2: float, b3: float, b4: float, b5: float) -> Observation:
 59
 60    observation = study.new_observation()
 61    #tensor of design values to reconstruct
 62    x = torch.tensor([b1, b2, b3, b4, b5])    
 63    observation.add(model(x).tolist())
 64
 65    return observation
 66
 67# Run the minimization
 68study.set_evaluator(evaluate)
 69study.run()
 70best_sample = study.driver.best_sample
 71min_chisq = study.driver.min_objective
 72uncertainties = study.driver.uncertainties
 73print(f"Reconstructed parameters with chi-squared value {min_chisq:.4e}:")
 74for param in design_space:
 75    name = param['name']
 76    print(f"  {name} = {best_sample[name]:.3f} +/- {uncertainties[name]:.3f}")
 77
 78# Before running a Markov-chain Monte-Carlo (MCMC) sampling we converge the surrogate
 79# models by sampling around the minimum. To make the study more explorative, the
 80# scaling parameter is increased and the effective degrees of freedom is set to one.
 81study.configure(
 82    scaling=10.0,
 83    effective_DOF=1.0, 
 84    min_uncertainty=min_chisq*1e-8,
 85    max_iter=150,
 86    min_val=0.0
 87)
 88study.run()
 89
 90# Run the MCMC sampling with 32 walkers
 91num_walkers, max_iter = 32, 10000
 92mcmc_result = study.driver.run_mcmc(
 93    rel_error=0.01,
 94    num_walkers=num_walkers,
 95    max_iter=max_iter
 96)
 97minimum = torch.tensor([3.7541005211E-01, 1.9358469127E+00, -1.4646871366E+00,
 98              1.2867534640E-01,2.2122699662E-01])
 99fig = corner.corner(
100    np.array(mcmc_result['samples']),
101    quantiles=(0.16, 0.5, 0.84),
102    levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
103    show_titles=True, scale_hist=False,
104    title_fmt=".3f",
105    labels=[d['name'] for d in design_space],
106    truths=minimum.numpy()
107)
108plt.savefig("corner_surrogate.svg", transparent=True) 
109
110# As a comparison, we run the MCMC sampling directly on the analytic model.
111p0 = 0.05*np.random.randn(num_walkers, len(design_space))
112p0 += minimum.numpy()
113min_chisq_cert = 5.4648946975E-05 #certified minimum of MGH17 problem
114#reduced standard error sqrt(chisq/DOF) to scale measurement uncertainties
115RSE = np.sqrt(min_chisq_cert/(len(target)-len(design_space)))
116
117#log probability function
118def log_prob(x):
119    out = -0.5*np.sum(((model(torch.tensor(x))-target)/RSE).numpy()**2)
120    if np.isnan(out): return -np.inf
121    return out
122
123sampler = emcee.EnsembleSampler(
124    nwalkers=num_walkers, ndim=len(design_space), log_prob_fn=log_prob
125)
126
127#burn-in phase
128state = sampler.run_mcmc(p0, 100)
129sampler.reset()
130#actual MCMC sampling 
131sampler.run_mcmc(state, max_iter, progress=True)
132samples = sampler.get_chain(flat=True)
133fig = corner.corner(
134    samples, quantiles=(0.16, 0.5, 0.84),
135    levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
136    show_titles=True, scale_hist=False,
137    title_fmt=".3f",
138    labels=[d['name'] for d in design_space],
139    truths=minimum.numpy()
140)
141plt.savefig("corner_analytic.svg", transparent=True)
142
MCMC sampling based on surrogate

Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the parameters \(b_1,\dots,b_5\) based on the analytic model function.

MCMC sampling based on surrogate

Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the parameters \(b_1,\dots,b_5\) based on the trained surrogate of the study. A comparison between the analytic and the surrogate model function shows a good quantitative agreement.