Bayesian parameter reconstruction using Bayesian optimization
- Driver:
- Download script:
The target of the study is to showcase the solution of a Bayesian parameter reconstruction problembased on a set ot measurements \(\mathbf{t} \in \mathbb{R}^K\).
We assume that the measurement process can be accurately modeled by a function \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^K\). As an example we consider the MGH17 problem which consists of fitting a vectorial model \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^{33}\) with
to a measurement vector with \(K = 33\) entries.
Due to random measurement noise \(\mathbf{w} \sim \mathcal{N}(0, {\rm diag}{[\eta_1^2,...,\eta_K^2]})\), the vector of measurements \(\mathbf{t} = \mathbf{f}(\mathbf{b}) + \mathbf{w}\) is a random vector with probability density
The noise variances \(\eta_i^2\) shall be unknown and estimated from the measurement data. To this end we assume that the variance is composed of a background term \(c_1^2\) and a noise contribution which scales linearly with \(f_i(\mathbf{b})\), i.e.
Taking non-uniform prior distributions for the design parameter vector \(P_\text{prior}(\mathbf{b})\) and the error model parameters \(P_\text{prior}(\mathbf{c})\) into account the posterior distribution is then proportional to \(P(\mathbf{b}, \mathbf{c} | \mathbf{t}) \propto P(\mathbf{t} | \mathbf{b}, \mathbf{c}) P_\text{prior}(\mathbf{b}) P_\text{prior}(\mathbf{c})\).
Alltogether, the target of finding the parameters with maximum posterior probability density is equivalent of minimizing the value of the negative log-probability
In the following, we show how this negative log-probability can be minimized using the BayesianReconstruction driver. We compare the results of the dirver’s MCMC sampling of the posterior probability to an MCMC sampling of the analytic value.
1import sys,os
2import numpy as np
3import time
4from scipy.stats import multivariate_normal, uniform
5import pandas as pd
6import torch
7torch.set_default_dtype(torch.float64)
8import matplotlib.pyplot as plt
9import corner #run "pip install corner" if not installed
10import emcee #run "pip install emcee" if not installed
11
12
13from jcmoptimizer import Server, Client, Study, Obseravtion
14server = Server()
15client = Client(host=server.host)
16
17
18# Definition of the search domain
19design_space = [
20 {'name': 'b1', 'type': 'continuous', 'domain': (0,10)},
21 {'name': 'b2', 'type': 'continuous', 'domain': (0.1,4)},
22 {'name': 'b3', 'type': 'continuous', 'domain': (-4,-0.1)},
23 {'name': 'b4', 'type': 'continuous', 'domain': (0.05,1)},
24 {'name': 'b5', 'type': 'continuous', 'domain': (0.05,1)}
25]
26
27# Creation of the study object with study_id 'bayesian_reconstruction'
28study = client.create_study(
29 design_space=design_space,
30 driver="BayesianReconstruction",
31 study_name="Bayesian parameter reconstruction using Bayesian optimization",
32 study_id="bayesian_reconstruction"
33)
34#The vectorial model function of the MGH17 problem
35def model(x: torch.Tensor) -> torch.Tensor:
36 s = torch.arange(33)
37 return x[0] + x[1]*torch.exp(-s*x[3]) + x[2]*torch.exp(-s*x[4])
38
39#The forward model parameters b to be reconstructed
40b_true = torch.tensor([3.7541005211E-01, 1.9358469127E+00, -1.4646871366E+00,
41 1.2867534640E-01,2.2122699662E-01])
42
43#The error model parameters in log-space to be reconstructed
44log_c1, log_c2 = np.log(0.005), np.log(0.01)
45
46#The error model, i.e. the noise stddev depending on the model value y=f(b)
47def error_model(log_c1: float, log_c2: float, y: torch.Tensor) -> torch.Tensor:
48 return torch.sqrt( np.exp(log_c1)**2 + (np.exp(log_c2)*y)**2)
49
50#Generate a rantom target vector of measurements
51torch.manual_seed(0)
52model_vector = model(b_true)
53err = error_model(log_c1,log_c2,model_vector)
54measurement_vector = model_vector + err*torch.randn(model_vector.shape)
55
56study.configure(
57 max_iter=80,
58 target_vector=measurement_vector.tolist(),
59 error_model=dict(
60 #error model expression
61 expression='sqrt(exp(log_c1)^2 + (exp(log_c2)*y_model)^2)',
62 #prior distribution of error model parameters
63 distributions=[
64 {'type': 'normal', 'parameter': 'log_c1', 'mean': -5.0, 'stddev': 1},
65 {'type': 'normal', 'parameter': 'log_c2', 'mean': -4.0, 'stddev': 1},
66 ],
67 #initial values and parameter bounds for fitting the error model parameters
68 initial_parameters=[-5.0, -4.0],
69 parameter_bounds=[(-7,-2), (-6.0,-1)]
70 ),
71 #Multivariate normal prior distribution of forward model parameters.
72 #Unspecified parameters (b1,b4,b5) are uniformly distributed in the design space
73 parameter_distribution=dict(
74 distributions=[
75 dict(type='mvn', parameters=['b2','b3'], mean=[2.25,-2.0],
76 covariance=[[0.5,-0.01], [-0.01,0.5]]),
77 ]
78 )
79)
80# Evaluation of the black-box function for specified design parameters
81def evaluate(study: Study, b1: float, b2: float, b3: float, b4: float, b5: float) -> Observation:
82
83 observation = study.new_observation()
84 #tensor of design values to reconstruct
85 x = torch.tensor([b1, b2, b3, b4, b5])
86 observation.add(model(x).tolist())
87
88 return observation
89
90# Run the minimization
91study.set_evaluator(evaluate)
92study.run()
93#best sample of forward model parameters b
94best_b_sample = study.driver.best_sample
95#minimum of negative log-probability
96min_neg_log_prob = study.driver.min_objective
97
98#determine sample [b1, b2, b3, b4, b5, log_c1, log_c2] that minimizes the negative
99#log-probability
100minimum = list(best_b_sample.values())
101
102#path to negative log-probability variable
103path = "driver.acquisition_function.main_objective.variable"
104neg_log_probs = study.historic_parameter_values(f"{path}.observed_value")
105idx = np.argmin(neg_log_probs)
106
107logs_c1 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c1")
108logs_c2 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c2")
109
110minimum += [logs_c1[idx], logs_c2[idx]]
111minimum = np.array(minimum)
112
113
114# Before running a Markov-chain Monte-Carlo (MCMC) sampling we converge the surrogate
115# models by sampling around the minimum. To make the study more explorative, the
116# scaling parameter is increased and the effective degrees of freedom is set to one.
117study.configure(
118 scaling=10.0,
119 effective_DOF=1.0,
120 min_uncertainty=1e-8*np.abs(min_neg_log_prob),
121 max_iter=120,
122)
123study.run()
124
125# Run the MCMC sampling with 32 walkers
126num_walkers, max_iter = 32, 20000
127mcmc_result = study.driver.run_mcmc(
128 rel_error=0.01,
129 num_walkers=num_walkers,
130 max_iter=max_iter
131)
132fig = corner.corner(
133 np.array(mcmc_result['samples']),
134 quantiles=(0.16, 0.5, 0.84),
135 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
136 show_titles=True, scale_hist=False,
137 title_fmt=".3f",
138 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
139 truths=minimum
140)
141plt.savefig("corner_surrogate.svg", transparent=True)
142
143
144# As a comparison, we run the MCMC sampling directly on the analytic model.
145p0 = 0.05*np.random.randn(num_walkers, len(design_space)+2)
146p0 += minimum
147
148# Uniform prior domain for the model parameters b1, b4, b5
149uniform_domain = np.array([design_space[idx]["domain"] for idx in [0,3,4]])
150
151#log probability function
152def log_prob(x: np.ndarray) -> np.ndarray:
153
154 y = model(x[:5])
155 res = y - measurement_vector
156 err = error_model(x[5], x[6], y)
157
158 #log-likelihood
159 ll = -0.5*(
160 torch.log(2*torch.tensor(torch.pi)) +
161 torch.log(err**2) +
162 (res/err)**2
163 ).sum()
164
165 #log prior
166 lp = (
167 multivariate_normal.logpdf(
168 x[1:3],
169 mean=[2.25,-2.0],
170 cov=[[0.5,-0.01], [-0.01,0.5]]
171 ) +
172 uniform.logpdf(
173 [x[0],x[3],x[4]],
174 loc=uniform_domain[:,0],
175 scale=uniform_domain[:,1] - uniform_domain[:,0]
176 ).sum() +
177 multivariate_normal.logpdf(
178 x[5:],
179 mean=[-5.0,-4.0],
180 cov=[[1,0.0], [0.0,1]]
181 )
182 )
183
184 #log probability
185 return ll + lp
186sampler = emcee.EnsembleSampler(
187 nwalkers=num_walkers, ndim=len(design_space)+2, log_prob_fn=log_prob
188)
189
190#burn-in phase
191state = sampler.run_mcmc(p0, 100)
192sampler.reset()
193#actual MCMC sampling
194sampler.run_mcmc(state, max_iter, progress=True)
195samples = sampler.get_chain(flat=True)
196fig = corner.corner(
197 samples, quantiles=(0.16, 0.5, 0.84),
198 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
199 show_titles=True, scale_hist=False,
200 title_fmt=".3f",
201 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
202 truths=minimum
203)
204plt.savefig("corner_analytic.svg", transparent=True)
205
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the analytic model function.
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the trained surrogate of the study. A comparison between the analytic and the surrogate model function shows a good quantitative agreement.