Active learning of a global surrogate model

Driver:: ActiveLearning
Download script:: active_surrogate_training.py

The target of the study is to train a surrogate model of a vectorial function. We consider an active training loop. That is, in each iteration, training data is generated by evaluating the vectorial function at the point of maximal prediction uncertainty.

As an example problem we consider the frequency dependent transmission function of a Fabry-Pérot etalon (angular frequency \(\omega\)). The etalon consists of a resonator of length \(l\) formed by a pair of mirrors with reflectivities \(R_1\) and \(R_2\). The propagation losses inside the resonator are quantified by the intensity-loss coefficient \(\alpha=0.05\).

The transmission function is given as (see also Wikipedia entry on Fabry-Pérot etalon)

\[A_\text{trans}(\omega, R_1, R_2, l) = \frac {(1-R_{1})(1-R_{2})e^{-\alpha l }}{\left({1-{\sqrt {R_{1}R_{2}}}e^{-\alpha l }}\right)^{2}+4{\sqrt {R_{1}R_{2}}}e^{-\alpha l }\sin^{2}(\phi )}.\]

The round-trip phase shift of the light field inside the resonator is \(\phi(\omega, l) = \omega \cdot l\).

We consider the case that we wish to learn the vectorial mapping from etalon parameters \((R_1, R_2, l)\) to transmission spectra

\[\begin{split}\mathbf{f}(R_1, R_2, l) = \begin{bmatrix} A_\text{trans}(\omega_1, R_1, R_2, l) \\ A_\text{trans}(\omega_2, R_1, R_2, l) \\ \vdots \\ A_\text{trans}(\omega_{50}, R_1, R_2, l) \end{bmatrix}\end{split}\]

with \(\omega_k = 2\pi k/50\).

It is possible to learn this vectorial mapping using multi-output a Gaussian process or a multi-output Bayesian neural network. Here, we present the approach to learn instead the 4d scalar function \(A_\text{trans}(\omega, R_1, R_2, l)\) using a single-output Bayesian neural network. This has the advantage that the vector entries are not learned independently, but that correlations between similar frequencies are taken into account. Moreover, after training one can get predictions for arbitrarily fine omega scans.

import sys,os
import numpy as np
import time
import matplotlib.pyplot as plt

from jcmoptimizer import Client, Study, Obseravtion
client = Client()


def Atrans(
        R1: float, R2: float, l: float, alpha: float, omega: float
) -> np.ndarray:
    """Transmission through the etalon
    Args:
       R1: Reflectivity of first mirror
       R2: Reflectivity of second mirror
       l: resonator length 
       alpha: Intensity loss coefficient
       omega: Angular frequency of light
    """
    loss = np.exp(-alpha*l)
    R = np.sqrt(R1*R2)
    
    out = (1 - R1)*(1 - R2)*loss 
    out /= (1 - R*loss)**2 + 4*R*loss*np.sin(omega*l)**2
    return out

# Definition of the parameter domain
design_space = [
    {'name': 'R1', 'type': 'continuous', 'domain': (0.1, 0.7)}, 
    {'name': 'R2', 'type': 'continuous', 'domain': (0.1, 0.7)}, 
    {'name': 'l', 'type': 'continuous', 'domain': (0.5, 1.0)}, 
]

# Definition of the fixed environment variable alpha and the
# the scan variable omega
environment = [
    {'name': 'alpha', 'type': 'fixed', 'domain': 0.05},
    {'name': 'omega', 'type': 'variable', 'domain': (0, 2*np.pi)},
]
#The omega-scan defining the transmission spectra
omegas = np.linspace(0, 2*np.pi, 50)

# Creation of the study object with study_id 'active_surrogate_training'
study = client.create_study(
    design_space=design_space,
    environment=environment,
    driver="ActiveLearning",
    study_name="Active learning of a global surrogate model",
    study_id="active_surrogate_training"
)

study.configure(
    max_iter = 50,
    surrogates=[
        # We use a neural network with 4 hidden layers of 200 neurons each
        # to learn the scalar function Atrans(R1, R2, l, omega)
        dict(
            type="NN", name="Atrans", output_dim=1,
            hidden_layers_arch=[200, 200, 200, 200],
            num_NNs=60,
            optimization_step_max=-1,
            trainer=dict(
                type="full_data_trainer",
                num_epochs=1000,
                num_expel_NNs=30
            )
        )
    ],
    variables=[
        # The variable defines a scan of the surrogate prediction over all omega values
        dict(
            type="Scan",
            name="omega_scan",
            input_surrogate="Atrans",
            output_dim=len(omegas),
            scan_parameters=["omega"],
            scan_values=omegas[:, None].tolist(),
        ),
        # The variable defines the average transmission of the omega-scan
        dict(type="LinearCombination", name="average", inputs=["omega_scan"]),
    ],
    objectives=[
        # The objective is to evaluate the model function at maximal uncertainty of
        # the average transmission.
        dict(
            type="Explorer",
            name="objective",
            variable="average",
        )
    ]
)

# Evaluation of the black-box function for specified design parameters
def evaluate(study: Study, R1: float, R2: float, l: float, alpha: float) -> Observation:
    time.sleep(2) # make objective expensive
    observation = study.new_observation()
    for omega in omegas:
        observation.add(
            Atrans(R1, R2, l, alpha, omega),
            environment_value=[omega],
            model_name="Atrans",
        )
    return observation

# Run the training loop
study.set_evaluator(evaluate)
study.run()



study.configure(
    surrogates=[
        # For making more accurate predictions, we train the network on
        # all data for 1500 epochs.
        dict(
            type="NN", name="Atrans", output_dim=1,
            hidden_layers_arch=[200, 200, 200, 200],
            num_NNs=60,
            trainer=dict(
                type="full_data_trainer",
                num_epochs=1500,
                num_expel_NNs=30
            )
        ),
    ],
)

# Get prediction and anayltic values on a finer resolved omega-scan
omegas_fine = np.linspace(0, 2 * np.pi, 150)

# To test the worst-case prediction, we get a suggestion corresponding to
# a sample with largest uncertainty
s = study.get_suggestion()
study.clear_suggestion(s.id)

plt.figure(figsize=(10, 5))
for R1, R2, l in [
    (s.kwargs["R1"], s.kwargs["R2"], s.kwargs["l"]),
    (0.1, 0.1, 0.5),
    (0.1, 0.7, 0.75),
    (0.7, 0.7, 1.0),
]:
    prediction = study.driver.predict(
        points=[[R1, R2, l, omega] for omega in omegas_fine],
        object_type="surrogate",
        name="Atrans",
    )
    mean = np.array(prediction["mean"]).squeeze()
    std = np.sqrt(np.array(prediction["variance"])).squeeze()
    p = plt.plot(omegas_fine,  mean)
    plt.fill_between(
        omegas_fine, mean - std, mean + std, alpha=0.2, color=p[0].get_color(),
    )
    plt.plot(
        omegas_fine,
        [Atrans(R1, R2, l, 0.05, omega) for omega in omegas_fine],
        "--",
        color=p[0].get_color(),
    )

plt.xlabel("Angular frequency")
plt.ylabel("Transmission")
plt.grid()
plt.savefig("etalon_predictions.svg", transparent=True)

client.shutdown_server()

The figure shows for different parameters \(R_1, R_2\) and \(l\) the predicted transmission function (solid lines, shading indicates uncertainty of prediction) in comparison to the analytical transmission value (dashed lines). The blue line corresponds to the prediction with the largest average uncertainty. The other lines correspond to the etalon parameters \(R_1 = 0.1, R_2 = 0.1, l = 0.5\) (orange), \(R_1 = 0.1, R_2 = 0.7, l = 0.75\) (green) and \(R_1 = 0.7, R_2 = 0.7, l = 1.0\) (red). Considering the small number of 50 data points, the agreement between prediction and analytical value is very good.