Bayesian parameter reconstruction using Bayesian optimization
- Driver:
- Download script:
The target of the study is to showcase the solution of a Bayesian parameter reconstruction problembased on a set ot measurements \(\mathbf{t} \in \mathbb{R}^K\).
We assume that the measurement process can be accurately modeled by a function \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^K\). As an example we consider the MGH17 problem which consists of fitting a vectorial model \(\mathbf{f}(\mathbf{b}) \in \mathbb{R}^{33}\) with
to a measurement vector with \(K = 33\) entries.
Due to random measurement noise \(\mathbf{w} \sim \mathcal{N}(0, {\rm diag}{[\eta_1^2,...,\eta_K^2]})\), the vector of measurements \(\mathbf{t} = \mathbf{f}(\mathbf{b}) + \mathbf{w}\) is a random vector with probability density
The noise variances \(\eta_i^2\) shall be unknown and estimated from the measurement data. To this end we assume that the variance is composed of a background term \(c_1^2\) and a noise contribution which scales linearly with \(f_i(\mathbf{b})\), i.e.
Taking non-uniform prior distributions for the design parameter vector \(P_\text{prior}(\mathbf{b})\) and the error model parameters \(P_\text{prior}(\mathbf{c})\) into account the posterior distribution is then proportional to \(P(\mathbf{b}, \mathbf{c} | \mathbf{t}) \propto P(\mathbf{t} | \mathbf{b}, \mathbf{c}) P_\text{prior}(\mathbf{b}) P_\text{prior}(\mathbf{c})\).
Alltogether, the target of finding the parameters with maximum posterior probability density is equivalent of minimizing the value of the negative log-probability
In the following, we show how this negative log-probability can be minimized using the BayesianReconstruction driver. We compare the results of the dirver’s MCMC sampling of the posterior probability to an MCMC sampling of the analytic value.
1import sys,os
2import numpy as np
3import time
4from scipy.stats import multivariate_normal, uniform
5import pandas as pd
6import torch
7torch.set_default_dtype(torch.float64)
8import matplotlib.pyplot as plt
9import corner #run "pip install corner" if not installed
10import emcee #run "pip install emcee" if not installed
11
12
13from jcmoptimizer import Client, Study, Obseravtion
14client = Client()
15
16
17# Definition of the search domain
18design_space = [
19 {'name': 'b1', 'type': 'continuous', 'domain': (0,10)},
20 {'name': 'b2', 'type': 'continuous', 'domain': (0.1,4)},
21 {'name': 'b3', 'type': 'continuous', 'domain': (-4,-0.1)},
22 {'name': 'b4', 'type': 'continuous', 'domain': (0.05,1)},
23 {'name': 'b5', 'type': 'continuous', 'domain': (0.05,1)}
24]
25
26# Creation of the study object with study_id 'bayesian_reconstruction'
27study = client.create_study(
28 design_space=design_space,
29 driver="BayesianReconstruction",
30 study_name="Bayesian parameter reconstruction using Bayesian optimization",
31 study_id="bayesian_reconstruction"
32)
33#The vectorial model function of the MGH17 problem
34def model(x: torch.Tensor) -> torch.Tensor:
35 s = torch.arange(33)
36 return x[0] + x[1]*torch.exp(-s*x[3]) + x[2]*torch.exp(-s*x[4])
37
38#The forward model parameters b to be reconstructed
39b_true = torch.tensor([3.7541005211E-01, 1.9358469127E+00, -1.4646871366E+00,
40 1.2867534640E-01,2.2122699662E-01])
41
42#The error model parameters in log-space to be reconstructed
43log_c1, log_c2 = np.log(0.005), np.log(0.01)
44
45#The error model, i.e. the noise stddev depending on the model value y=f(b)
46def error_model(log_c1: float, log_c2: float, y: torch.Tensor) -> torch.Tensor:
47 return torch.sqrt( np.exp(log_c1)**2 + (np.exp(log_c2)*y)**2)
48
49#Generate a rantom target vector of measurements
50torch.manual_seed(0)
51model_vector = model(b_true)
52err = error_model(log_c1,log_c2,model_vector)
53measurement_vector = model_vector + err*torch.randn(model_vector.shape)
54
55study.configure(
56 max_iter=80,
57 target_vector=measurement_vector.tolist(),
58 error_model=dict(
59 #error model expression
60 expression='sqrt(exp(log_c1)^2 + (exp(log_c2)*y_model)^2)',
61 #prior distribution of error model parameters
62 distributions=[
63 {'type': 'normal', 'parameter': 'log_c1', 'mean': -5.0, 'stddev': 1},
64 {'type': 'normal', 'parameter': 'log_c2', 'mean': -4.0, 'stddev': 1},
65 ],
66 #initial values and parameter bounds for fitting the error model parameters
67 initial_parameters=[-5.0, -4.0],
68 parameter_bounds=[(-7,-2), (-6.0,-1)]
69 ),
70 #Multivariate normal prior distribution of forward model parameters.
71 #Unspecified parameters (b1,b4,b5) are uniformly distributed in the design space
72 parameter_distribution=dict(
73 distributions=[
74 dict(type='mvn', parameters=['b2','b3'], mean=[2.25,-2.0],
75 covariance=[[0.5,-0.01], [-0.01,0.5]]),
76 ]
77 )
78)
79# Evaluation of the black-box function for specified design parameters
80def evaluate(study: Study, b1: float, b2: float, b3: float, b4: float, b5: float) -> Observation:
81
82 observation = study.new_observation()
83 #tensor of design values to reconstruct
84 x = torch.tensor([b1, b2, b3, b4, b5])
85 observation.add(model(x).tolist())
86
87 return observation
88
89# Run the minimization
90study.set_evaluator(evaluate)
91study.run()
92#best sample of forward model parameters b
93best_b_sample = study.driver.best_sample
94#minimum of negative log-probability
95min_neg_log_prob = study.driver.min_objective
96
97#determine sample [b1, b2, b3, b4, b5, log_c1, log_c2] that minimizes the negative
98#log-probability
99minimum = list(best_b_sample.values())
100
101#path to negative log-probability variable
102path = "driver.acquisition_function.main_objective.variable"
103neg_log_probs = study.historic_parameter_values(f"{path}.observed_value")
104idx = np.argmin(neg_log_probs)
105
106logs_c1 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c1")
107logs_c2 = study.historic_parameter_values(f"{path}.error_model_parameters.log_c2")
108
109minimum += [logs_c1[idx], logs_c2[idx]]
110minimum = np.array(minimum)
111
112
113# Before running a Markov-chain Monte-Carlo (MCMC) sampling we converge the surrogate
114# models by sampling around the minimum. To make the study more explorative, the
115# scaling parameter is increased and the effective degrees of freedom is set to one.
116study.configure(
117 scaling=10.0,
118 effective_DOF=1.0,
119 min_uncertainty=1e-8*np.abs(min_neg_log_prob),
120 max_iter=120,
121)
122study.run()
123
124# Run the MCMC sampling with 32 walkers
125num_walkers, max_iter = 32, 20000
126mcmc_result = study.driver.run_mcmc(
127 rel_error=0.01,
128 num_walkers=num_walkers,
129 max_iter=max_iter
130)
131fig = corner.corner(
132 np.array(mcmc_result['samples']),
133 quantiles=(0.16, 0.5, 0.84),
134 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
135 show_titles=True, scale_hist=False,
136 title_fmt=".3f",
137 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
138 truths=minimum
139)
140plt.savefig("corner_surrogate.svg", transparent=True)
141
142
143# As a comparison, we run the MCMC sampling directly on the analytic model.
144p0 = 0.05*np.random.randn(num_walkers, len(design_space)+2)
145p0 += minimum
146
147# Uniform prior domain for the model parameters b1, b4, b5
148uniform_domain = np.array([design_space[idx]["domain"] for idx in [0,3,4]])
149
150#log probability function
151def log_prob(x: np.ndarray) -> np.ndarray:
152
153 y = model(x[:5])
154 res = y - measurement_vector
155 err = error_model(x[5], x[6], y)
156
157 #log-likelihood
158 ll = -0.5*(
159 torch.log(2*torch.tensor(torch.pi)) +
160 torch.log(err**2) +
161 (res/err)**2
162 ).sum()
163
164 #log prior
165 lp = (
166 multivariate_normal.logpdf(
167 x[1:3],
168 mean=[2.25,-2.0],
169 cov=[[0.5,-0.01], [-0.01,0.5]]
170 ) +
171 uniform.logpdf(
172 [x[0],x[3],x[4]],
173 loc=uniform_domain[:,0],
174 scale=uniform_domain[:,1] - uniform_domain[:,0]
175 ).sum() +
176 multivariate_normal.logpdf(
177 x[5:],
178 mean=[-5.0,-4.0],
179 cov=[[1,0.0], [0.0,1]]
180 )
181 )
182
183 #log probability
184 return ll + lp
185sampler = emcee.EnsembleSampler(
186 nwalkers=num_walkers, ndim=len(design_space)+2, log_prob_fn=log_prob
187)
188
189#burn-in phase
190state = sampler.run_mcmc(p0, 100)
191sampler.reset()
192#actual MCMC sampling
193sampler.run_mcmc(state, max_iter, progress=True)
194samples = sampler.get_chain(flat=True)
195fig = corner.corner(
196 samples, quantiles=(0.16, 0.5, 0.84),
197 levels=(1-np.exp(-1.0), 1-np.exp(-0.5)),
198 show_titles=True, scale_hist=False,
199 title_fmt=".3f",
200 labels=[d['name'] for d in design_space] + ["log_c1", "log_c2"],
201 truths=minimum
202)
203plt.savefig("corner_analytic.svg", transparent=True)
204
205
206client.shutdown_server()
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the analytic model function.
Markov-Chain Monte-Carlo (MCMC) sampling of the probability density of the model parameters \(b_1,\dots,b_5\) and the log value of the error model parameters \(c_1,\dots,c_2\) based on the trained surrogate of the study. A comparison between the analytic and the surrogate model function shows a good quantitative agreement.