Active learning of a global surrogate model
- Driver:
- Download script:
The target of the study is to train a surrogate model of a vectorial function. We consider an active training loop. That is, in each iteration, training data is generated by evaluating the vectorial function at the point of maximal prediction uncertainty.
As an example problem we consider the frequency dependent transmission function of a Fabry-Pérot etalon (angular frequency \(\omega\)). The etalon consists of a resonator of length \(l\) formed by a pair of mirrors with reflectivities \(R_1\) and \(R_2\). The propagation losses inside the resonator are quantified by the intensity-loss coefficient \(\alpha=0.05\).
The transmission function is given as (see also Wikipedia entry on Fabry-Pérot etalon)
The round-trip phase shift of the light field inside the resonator is \(\phi(\omega, l) = \omega \cdot l\).
We consider the case that we wish to learn the vectorial mapping from etalon parameters \((R_1, R_2, l)\) to transmission spectra
with \(\omega_k = 2\pi k/50\).
It is possible to learn this vectorial mapping using multi-output a Gaussian process or a multi-output Bayesian neural network. Here, we present the approach to learn instead the 4d scalar function \(A_\text{trans}(\omega, R_1, R_2, l)\) using a single-output Bayesian neural network. This has the advantage that the vector entries are not learned independently, but that correlations between similar frequencies are taken into account. Moreover, after training one can get predictions for arbitrarily fine omega scans.
1import sys,os
2import numpy as np
3import time
4import matplotlib.pyplot as plt
5
6jcm_optimizer_path = r"<JCM_OPTIMIZER_PATH>"
7sys.path.insert(0, os.path.join(jcm_optimizer_path, "interface", "python"))
8from jcmoptimizer import Server, Client, Study, Observation
9server = Server()
10client = Client(server.host)
11
12def Atrans(
13 R1: float, R2: float, l: float, alpha: float, omega: float
14) -> np.ndarray:
15 """Transmission through the etalon
16 Args:
17 R1: Reflectivity of first mirror
18 R2: Reflectivity of second mirror
19 l: resonator length
20 alpha: Intensity loss coefficient
21 omega: Angular frequency of light
22 """
23 loss = np.exp(-alpha*l)
24 R = np.sqrt(R1*R2)
25
26 out = (1 - R1)*(1 - R2)*loss
27 out /= (1 - R*loss)**2 + 4*R*loss*np.sin(omega*l)**2
28 return out
29
30# Definition of the parameter domain
31design_space = [
32 {'name': 'R1', 'type': 'continuous', 'domain': (0.1, 0.7)},
33 {'name': 'R2', 'type': 'continuous', 'domain': (0.1, 0.7)},
34 {'name': 'l', 'type': 'continuous', 'domain': (0.5, 1.0)},
35]
36
37# Definition of the fixed environment variable alpha and the
38# the scan variable omega
39environment = [
40 {'name': 'alpha', 'type': 'fixed', 'domain': 0.05},
41 {'name': 'omega', 'type': 'variable', 'domain': (0, 2*np.pi)},
42]
43#The omega-scan defining the transmission spectra
44omegas = np.linspace(0, 2*np.pi, 50)
45
46# Creation of the study object with study_id 'active_surrogate_training'
47study = client.create_study(
48 design_space=design_space,
49 environment=environment,
50 driver="ActiveLearning",
51 name="Active learning of a global surrogate model",
52 study_id="active_surrogate_training"
53)
54
55study.configure(
56 max_iter = 50,
57 surrogates=[
58 # We use a neural network with 4 hidden layers of 200 neurons each
59 # to learn the scalar function Atrans(R1, R2, l, omega)
60 dict(
61 type="NN", name="Atrans", output_dim=1,
62 hidden_layers_arch=[200, 200, 200, 200],
63 num_NNs=60,
64 trainer=dict(
65 type="full_data_trainer",
66 num_epochs=1000,
67 num_expel_NNs=30
68 )
69 )
70 ],
71 variables=[
72 # The variable defines a scan of the surrogate prediction over all omega values
73 dict(
74 type="Scan",
75 name="omega_scan",
76 input_surrogate="Atrans",
77 output_dim=len(omegas),
78 scan_parameters=["omega"],
79 scan_values=omegas[:, None].tolist(),
80 ),
81 # The variable defines the average transmission of the omega-scan
82 dict(type="LinearCombination", name="average", inputs=["omega_scan"]),
83 ],
84 objectives=[
85 # The objective is to evaluate the model function at maximal uncertainty of
86 # the average transmission.
87 dict(
88 type="Explorer",
89 name="objective",
90 variable="average",
91 )
92 ]
93)
94
95# Evaluation of the black-box function for specified design parameters
96def evaluate(study: Study, R1: float, R2: float, l: float, alpha: float) -> Observation:
97 time.sleep(2) # make objective expensive
98 observation = study.new_observation()
99 for omega in omegas:
100 observation.add(
101 Atrans(R1, R2, l, alpha, omega),
102 environment_value=[omega],
103 model_name="Atrans",
104 )
105 return observation
106
107# Run the training loop
108study.set_evaluator(evaluate)
109study.run()
110
111
112
113study.configure(
114 surrogates=[
115 # For making more accurate predictions, we train the network on
116 # all data for 1500 epochs.
117 dict(
118 type="NN", name="Atrans", output_dim=1,
119 hidden_layers_arch=[200, 200, 200, 200],
120 num_NNs=60,
121 trainer=dict(
122 type="full_data_trainer",
123 num_epochs=1500,
124 num_expel_NNs=30
125 )
126 ),
127 ],
128)
129
130# Get prediction and anayltic values on a finer resolved omega-scan
131omegas_fine = np.linspace(0, 2 * np.pi, 150)
132
133# To test the worst-case prediction, we get a suggestion corresponding to
134# a sample with largest uncertainty
135s = study.get_suggestion()
136study.clear_suggestion(s.id)
137
138plt.figure(figsize=(10, 5))
139for R1, R2, l in [
140 (s.kwargs["R1"], s.kwargs["R2"], s.kwargs["l"]),
141 (0.1, 0.1, 0.5),
142 (0.1, 0.7, 0.75),
143 (0.7, 0.7, 1.0),
144]:
145 prediction = study.driver.predict(
146 points=[[R1, R2, l, omega] for omega in omegas_fine],
147 object_type="surrogate",
148 name="Atrans",
149 )
150 mean = np.array(prediction["mean"]).squeeze()
151 std = np.sqrt(np.array(prediction["variance"])).squeeze()
152 p = plt.plot(omegas_fine, mean)
153 plt.fill_between(
154 omegas_fine, mean - std, mean + std, alpha=0.2, color=p[0].get_color(),
155 )
156 plt.plot(
157 omegas_fine,
158 [Atrans(R1, R2, l, 0.05, omega) for omega in omegas_fine],
159 "--",
160 color=p[0].get_color(),
161 )
162
163plt.xlabel("Angular frequency")
164plt.ylabel("Transmission")
165plt.grid()
166plt.savefig("etalon_predictions.svg", transparent=True)
The figure shows for different parameters \(R_1, R_2\) and \(l\) the predicted transmission function (solid lines, shading indicates uncertainty of prediction) in comparison to the analytical transmission value (dashed lines). The blue line corresponds to the prediction with the largest average uncertainty. The other lines correspond to the etalon parameters \(R_1 = 0.1, R_2 = 0.1, l = 0.5\) (orange), \(R_1 = 0.1, R_2 = 0.7, l = 0.75\) (green) and \(R_1 = 0.7, R_2 = 0.7, l = 1.0\) (red). Considering the small number of 50 data points, the agreement between prediction and analytical value is very good.