Active learning of a global surrogate model

Driver:

ActiveLearning

Download script:

active_surrogate_training.py

The target of the study is to train a surrogate model of a vectorial function. We consider an active training loop. That is, in each iteration, training data is generated by evaluating the vectorial function at the point of maximal prediction uncertainty.

As an example problem we consider the frequency dependent transmission function of a Fabry-Pérot etalon (angular frequency \(\omega\)). The etalon consists of a resonator of length \(l\) formed by a pair of mirrors with reflectivities \(R_1\) and \(R_2\). The propagation losses inside the resonator are quantified by the intensity-loss coefficient \(\alpha=0.05\).

The transmission function is given as (see also Wikipedia entry on Fabry-Pérot etalon)

\[A_\text{trans}(\omega, R_1, R_2, l) = \frac {(1-R_{1})(1-R_{2})e^{-\alpha l }}{\left({1-{\sqrt {R_{1}R_{2}}}e^{-\alpha l }}\right)^{2}+4{\sqrt {R_{1}R_{2}}}e^{-\alpha l }\sin^{2}(\phi )}.\]

The round-trip phase shift of the light field inside the resonator is \(\phi(\omega, l) = \omega \cdot l\).

We consider the case that we wish to learn the vectorial mapping from etalon parameters \((R_1, R_2, l)\) to transmission spectra

\[\begin{split}\mathbf{f}(R_1, R_2, l) = \begin{bmatrix} A_\text{trans}(\omega_1, R_1, R_2, l) \\ A_\text{trans}(\omega_2, R_1, R_2, l) \\ \vdots \\ A_\text{trans}(\omega_{50}, R_1, R_2, l) \end{bmatrix}\end{split}\]

with \(\omega_k = 2\pi k/50\).

It is possible to learn this vectorial mapping using multi-output a Gaussian process or a multi-output Bayesian neural network. Here, we present the approach to learn instead the 4d scalar function \(A_\text{trans}(\omega, R_1, R_2, l)\) using a single-output Bayesian neural network. This has the advantage that the vector entries are not learned independently, but that correlations between similar frequencies are taken into account. Moreover, after training one can get predictions for arbitrarily fine omega scans.

  1import sys,os
  2import numpy as np
  3import time
  4import matplotlib.pyplot as plt
  5
  6jcm_optimizer_path = r"<JCM_OPTIMIZER_PATH>"
  7sys.path.insert(0, os.path.join(jcm_optimizer_path, "interface", "python"))
  8from jcmoptimizer import Server, Client, Study, Observation
  9server = Server()
 10client = Client(server.host)
 11
 12def Atrans(
 13        R1: float, R2: float, l: float, alpha: float, omega: float
 14) -> np.ndarray:
 15    """Transmission through the etalon
 16    Args:
 17       R1: Reflectivity of first mirror
 18       R2: Reflectivity of second mirror
 19       l: resonator length 
 20       alpha: Intensity loss coefficient
 21       omega: Angular frequency of light
 22    """
 23    loss = np.exp(-alpha*l)
 24    R = np.sqrt(R1*R2)
 25    
 26    out = (1 - R1)*(1 - R2)*loss 
 27    out /= (1 - R*loss)**2 + 4*R*loss*np.sin(omega*l)**2
 28    return out
 29
 30# Definition of the parameter domain
 31design_space = [
 32    {'name': 'R1', 'type': 'continuous', 'domain': (0.1, 0.7)}, 
 33    {'name': 'R2', 'type': 'continuous', 'domain': (0.1, 0.7)}, 
 34    {'name': 'l', 'type': 'continuous', 'domain': (0.5, 1.0)}, 
 35]
 36
 37# Definition of the fixed environment variable alpha and the
 38# the scan variable omega
 39environment = [
 40    {'name': 'alpha', 'type': 'fixed', 'domain': 0.05},
 41    {'name': 'omega', 'type': 'variable', 'domain': (0, 2*np.pi)},
 42]
 43#The omega-scan defining the transmission spectra
 44omegas = np.linspace(0, 2*np.pi, 50)
 45
 46# Creation of the study object with study_id 'active_surrogate_training'
 47study = client.create_study(
 48    design_space=design_space,
 49    environment=environment,
 50    driver="ActiveLearning",
 51    name="Active learning of a global surrogate model",
 52    study_id="active_surrogate_training"
 53)
 54
 55study.configure(
 56    max_iter = 50,
 57    surrogates=[
 58        # We use a neural network with 4 hidden layers of 200 neurons each
 59        # to learn the scalar function Atrans(R1, R2, l, omega)
 60        dict(
 61            type="NN", name="Atrans", output_dim=1,
 62            hidden_layers_arch=[200, 200, 200, 200],
 63            num_NNs=60,
 64            optimization_step_max=-1,
 65            trainer=dict(
 66                type="full_data_trainer",
 67                num_epochs=1000,
 68                num_expel_NNs=30
 69            )
 70        )
 71    ],
 72    variables=[
 73        # The variable defines a scan of the surrogate prediction over all omega values
 74        dict(
 75            type="Scan",
 76            name="omega_scan",
 77            input_surrogate="Atrans",
 78            output_dim=len(omegas),
 79            scan_parameters=["omega"],
 80            scan_values=omegas[:, None].tolist(),
 81        ),
 82        # The variable defines the average transmission of the omega-scan
 83        dict(type="LinearCombination", name="average", inputs=["omega_scan"]),
 84    ],
 85    objectives=[
 86        # The objective is to evaluate the model function at maximal uncertainty of
 87        # the average transmission.
 88        dict(
 89            type="Explorer",
 90            name="objective",
 91            variable="average",
 92        )
 93    ]
 94)
 95
 96# Evaluation of the black-box function for specified design parameters
 97def evaluate(study: Study, R1: float, R2: float, l: float, alpha: float) -> Observation:
 98    time.sleep(2) # make objective expensive
 99    observation = study.new_observation()
100    for omega in omegas:
101        observation.add(
102            Atrans(R1, R2, l, alpha, omega),
103            environment_value=[omega],
104            model_name="Atrans",
105        )
106    return observation
107
108# Run the training loop
109study.set_evaluator(evaluate)
110study.run()
111
112
113
114study.configure(
115    surrogates=[
116        # For making more accurate predictions, we train the network on
117        # all data for 1500 epochs.
118        dict(
119            type="NN", name="Atrans", output_dim=1,
120            hidden_layers_arch=[200, 200, 200, 200],
121            num_NNs=60,
122            trainer=dict(
123                type="full_data_trainer",
124                num_epochs=1500,
125                num_expel_NNs=30
126            )
127        ),
128    ],
129)
130
131# Get prediction and anayltic values on a finer resolved omega-scan
132omegas_fine = np.linspace(0, 2 * np.pi, 150)
133
134# To test the worst-case prediction, we get a suggestion corresponding to
135# a sample with largest uncertainty
136s = study.get_suggestion()
137study.clear_suggestion(s.id)
138
139plt.figure(figsize=(10, 5))
140for R1, R2, l in [
141    (s.kwargs["R1"], s.kwargs["R2"], s.kwargs["l"]),
142    (0.1, 0.1, 0.5),
143    (0.1, 0.7, 0.75),
144    (0.7, 0.7, 1.0),
145]:
146    prediction = study.driver.predict(
147        points=[[R1, R2, l, omega] for omega in omegas_fine],
148        object_type="surrogate",
149        name="Atrans",
150    )
151    mean = np.array(prediction["mean"]).squeeze()
152    std = np.sqrt(np.array(prediction["variance"])).squeeze()
153    p = plt.plot(omegas_fine,  mean)
154    plt.fill_between(
155        omegas_fine, mean - std, mean + std, alpha=0.2, color=p[0].get_color(),
156    )
157    plt.plot(
158        omegas_fine,
159        [Atrans(R1, R2, l, 0.05, omega) for omega in omegas_fine],
160        "--",
161        color=p[0].get_color(),
162    )
163
164plt.xlabel("Angular frequency")
165plt.ylabel("Transmission")
166plt.grid()
167plt.savefig("etalon_predictions.svg", transparent=True)
Prediction of etalon transmission

The figure shows for different parameters \(R_1, R_2\) and \(l\) the predicted transmission function (solid lines, shading indicates uncertainty of prediction) in comparison to the analytical transmission value (dashed lines). The blue line corresponds to the prediction with the largest average uncertainty. The other lines correspond to the etalon parameters \(R_1 = 0.1, R_2 = 0.1, l = 0.5\) (orange), \(R_1 = 0.1, R_2 = 0.7, l = 0.75\) (green) and \(R_1 = 0.7, R_2 = 0.7, l = 1.0\) (red). Considering the small number of 50 data points, the agreement between prediction and analytical value is very good.