Bayesian parameter reconstruction using Bayesian optimization
- Driver:
- Download script:
The target of the study is to showcase the solution of a Bayesian parameter reconstruction problembased on a set ot measurements \(\mathbf{t} \in \mathbb{R}^K\).
We assume that the measurement process can be accurately modeled by a function \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^K\). As an example we consider the MGH17 problem which consists of fitting a vectorial model \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^{33}\) with
to a measurement vector with \(K = 33\) entries.
Due to random measurement noise \(\mathbf{w} \sim \mathcal{N}(0, {\rm diag}{[\eta_1^2,...,\eta_K^2]})\), the vector of measurements \(\mathbf{t} = \mathbf{f}(\mathbf{b}) + \mathbf{w}\) is a random vector with probability density
The noise variances \(\eta_i^2\) shall be unknown and estimated from the measurement data. To this end we assume that the variance is composed of a background term \(c_1^2\) and a noise contribution which scales linearly with \(f_i(\mathbf{b})\), i.e.
Taking non-uniform prior distributions for the design parameter vector \(P_\text{prior}(\mathbf{b})\) and the error model parameters \(P_\text{prior}(\mathbf{c})\) into account the posterior distribution is then proportional to \(P(\mathbf{b}, \mathbf{c} | \mathbf{t}) \propto P(\mathbf{t} | \mathbf{b}, \mathbf{c}) P_\text{prior}(\mathbf{b}) P_\text{prior}(\mathbf{c})\).
Alltogether, the target of finding the parameters with maximum posterior probability density is equivalent of minimizing the value of the negative log-probability
In the following, we show how this negative log-probability can be minimized using the BayesianReconstruction driver. We compare the results of the dirver’s MCMC sampling of the posterior probability to an MCMC sampling of the analytic value.
1import sys,os
2import numpy as np
3import time
4from scipy.stats import multivariate_normal, uniform
5import pandas as pd
6import torch
7torch.set_default_dtype(torch.float64)
8import matplotlib.pyplot as plt
9import corner #run "pip install corner" if not installed
10import emcee #run "pip install emcee" if not installed
11
12
13jcm_optimizer_path = r"<JCM_OPTIMIZER_PATH>"
14sys.path.insert(0, os.path.join(jcm_optimizer_path, "interface", "python"))
15from jcmoptimizer import Server, Client, Study, Observation
16server = Server()
17client = Client(server.host)
18
19# Definition of the search domain
20design_space = [
21 {'name': 'b1', 'type': 'continuous', 'domain': (0,10)},
22 {'name': 'b2', 'type': 'continuous', 'domain': (0.1,4)},
23 {'name': 'b3', 'type': 'continuous', 'domain': (-4,-0.1)},
24 {'name': 'b4', 'type': 'continuous', 'domain': (0.05,1)},
25 {'name': 'b5', 'type': 'continuous', 'domain': (0.05,1)}
26]
27
28# Creation of the study object with study_id 'bayesian_reconstruction'
29study = client.create_study(
30 design_space=design_space,
31 driver="BayesianReconstruction",
32 name="Bayesian parameter reconstruction using Bayesian optimization",
33 study_id="bayesian_reconstruction"
34)
35#The vectorial model function of the MGH17 problem
36def model(x: torch.Tensor) -> torch.Tensor:
37 s = torch.arange(33)
38 return x[0] + x[1]*torch.exp(-s*x[3]) + x[2]*torch.exp(-s*x[4])
39
40#The forward model parameters b to be reconstructed
41b_true = torch.tensor([3.7541005211E-01, 1.9358469127E+00, -1.4646871366E+00,
42 1.2867534640E-01,2.2122699662E-01])
43
44#The error model parameters in log-space to be reconstructed
45log_c1, log_c2 = np.log(0.005), np.log(0.01)
46
47#The error model, i.e. the noise stddev depending on the model value y=f(b)
48def error_model(log_c1: float, log_c2: float, y: torch.Tensor) -> torch.Tensor:
49 return torch.sqrt( np.exp(log_c1)**2 + (np.exp(log_c2)*y)**2)
50
51#Generate a rantom target vector of measurements
52torch.manual_seed(0)
53model_vector = model(b_true)
54err = error_model(log_c1,log_c2,model_vector)
55measurement_vector = model_vector + err*torch.randn(model_vector.shape)
56
57study.configure(
58 max_iter=80,
59 target_vector=measurement_vector.tolist(),
60 error_model=dict(
61 #error model expression
62 expression='sqrt(exp(log_c1)^2 + (exp(log_c2)*y_model)^2)',
63 #prior distribution of error model parameters
64 distributions=[
65 {'type': 'normal', 'parameter': 'log_c1', 'mean': -5.0, 'stddev': 1},
66 {'type': 'normal', 'parameter': 'log_c2', 'mean': -4.0, 'stddev': 1},
67 ],
68 #initial values and parameter bounds for fitting the error model parameters
69 initial_parameters=[-5.0, -4.0],
70 parameter_bounds=[(-7,-2), (-6.0,-1)]
71 ),
72 #Multivariate normal prior distribution of forward model parameters.
73 #Unspecified parameters (b1,b4,b5) are uniformly distributed in the design space
74 parameter_distribution=dict(
75 distributions=[
76 dict(type='mvn', parameters=['b2','b3'], mean=[2.25,-2.0],
77 covariance=[[0.5,-0.01], [-0.01,0.5]]),
78 ]
79 )
80)
81# Evaluation of the black-box function for specified design parameters
82def evaluate(study: Study, b1: float, b2: float, b3: float, b4: float, b5: float) -> Observation:
83
84 observation = study.new_observation()
85 #tensor of design values to reconstruct
86 x = torch.tensor([b1, b2, b3, b4, b5])
87 observation.add(model(x).tolist())
88
89 return observation
90
91# Run the minimization
92study.set_evaluator(evaluate)
93study.run()
94#best sample of forward model parameters b
95best_b_sample = study.driver.best_sample
96#minimum of negative log-probability
97min_neg_log_prob = study.driver.min_objective
98
99#determine sample [b1, b2, b3, b4, b5, log_c1, log_c2] that minimizes the negative
100#log-probability
101minimum = list(best_b_sample.values())
102
103#path to negative log-probability variable
104path = "driver.acquisition_function.main_objective.variable"
105neg_log_probs = study.historic_parameter_values(f"{path}.observed_value")
106idx = np.argmin(neg_log_probs)
107
108logs_c1 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c1")
109logs_c2 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c2")
110
111minimum += [logs_c1[idx], logs_c2[idx]]
112minimum = np.array(minimum)
113
114
115# Before running a Markov-chain Monte-Carlo (MCMC) sampling we converge the surrogate
116# models by sampling around the minimum. To make the study more explorative, the
117# scaling parameter is increased and the effective degrees of freedom is set to one.
118study.configure(
119 scaling=10.0,
120 effective_DOF=1.0,
121 min_uncertainty=1e-8*np.abs(min_neg_log_prob),
122 max_iter=120,
123)
124study.run()
125
126# Run the MCMC sampling with 32 walkers
127num_walkers, max_iter = 32, 20000
128mcmc_result = study.driver.run_mcmc(
129 rel_error=0.01,
130 num_walkers=num_walkers,
131 max_iter=max_iter
132)
133fig = corner.corner(
134 np.array(mcmc_result['samples']),
135 quantiles=(0.16, 0.5, 0.84),
136 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
137 show_titles=True, scale_hist=False,
138 title_fmt=".3f",
139 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
140 truths=minimum
141)
142plt.savefig("corner_surrogate.svg", transparent=True)
143
144
145# As a comparison, we run the MCMC sampling directly on the analytic model.
146p0 = 0.05*np.random.randn(num_walkers, len(design_space)+2)
147p0 += minimum
148
149# Uniform prior domain for the model parameters b1, b4, b5
150uniform_domain = np.array([design_space[idx]["domain"] for idx in [0,3,4]])
151
152#log probability function
153def log_prob(x: np.ndarray) -> np.ndarray:
154
155 y = model(x[:5])
156 res = y - measurement_vector
157 err = error_model(x[5], x[6], y)
158
159 #log-likelihood
160 ll = -0.5*(
161 torch.log(2*torch.tensor(torch.pi)) +
162 torch.log(err**2) +
163 (res/err)**2
164 ).sum()
165
166 #log prior
167 lp = (
168 multivariate_normal.logpdf(
169 x[1:3],
170 mean=[2.25,-2.0],
171 cov=[[0.5,-0.01], [-0.01,0.5]]
172 ) +
173 uniform.logpdf(
174 [x[0],x[3],x[4]],
175 loc=uniform_domain[:,0],
176 scale=uniform_domain[:,1] - uniform_domain[:,0]
177 ).sum() +
178 multivariate_normal.logpdf(
179 x[5:],
180 mean=[-5.0,-4.0],
181 cov=[[1,0.0], [0.0,1]]
182 )
183 )
184
185 #log probability
186 return ll + lp
187sampler = emcee.EnsembleSampler(
188 nwalkers=num_walkers, ndim=len(design_space)+2, log_prob_fn=log_prob
189)
190
191#burn-in phase
192state = sampler.run_mcmc(p0, 100)
193sampler.reset()
194#actual MCMC sampling
195sampler.run_mcmc(state, max_iter, progress=True)
196samples = sampler.get_chain(flat=True)
197fig = corner.corner(
198 samples, quantiles=(0.16, 0.5, 0.84),
199 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
200 show_titles=True, scale_hist=False,
201 title_fmt=".3f",
202 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
203 truths=minimum
204)
205plt.savefig("corner_analytic.svg", transparent=True)
206
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the analytic model function.
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the trained surrogate of the study. A comparison between the analytic and the surrogate model function shows a good quantitative agreement.