Fit feedforward Neural Network model

import warnings

import chart_studio
from besos import eppy_funcs as ef, sampling
from besos.evaluator import EvaluatorEP, EvaluatorGeneric
from besos.problem import EPProblem
from chart_studio import plotly as py
from plotly import graph_objs as go
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

from parameter_sets import parameter_set

We begin by: + getting a predefined list of 7 parameters from parameter_sets.py + making these into a problem with electricty use as the objective + and making an evaluator using the default EnergyPlus building.

parameters = parameter_set(7)
problem = EPProblem(parameters, ["Electricity:Facility"])
building = ef.get_building()
evaluator = EvaluatorEP(problem, building)

Then we get 20 samples across this design space and evaluate them.

inputs = sampling.dist_sampler(sampling.lhs, problem, 20)
outputs = evaluator.df_apply(inputs)
inputs

HBox(children=(FloatProgress(value=0.0, description='Executing', max=20.0, style=ProgressStyle(description_wid…

	Conductivity	Thickness	U-Factor	Solar Heat Gain Coefficient	ElectricEquipment	Lights	Window to Wall Ratio
0	0.165451	0.150655	1.543389	0.759667	10.846373	14.112927	0.024352
1	0.132154	0.147326	4.990325	0.476966	12.153939	13.535152	0.253947
2	0.080456	0.190067	2.774125	0.715199	10.749949	11.099823	0.929049
3	0.033565	0.287872	0.251303	0.886388	12.717156	14.578163	0.122789
4	0.118116	0.162183	3.689406	0.619588	10.248964	13.919604	0.822831
5	0.104531	0.127132	4.077076	0.528881	12.324394	14.488989	0.670414
6	0.196692	0.139876	0.506217	0.030411	11.374182	11.665950	0.375758
7	0.144890	0.267075	4.432675	0.172185	13.053027	10.926282	0.750233
8	0.085398	0.102225	1.615485	0.298316	10.455626	12.998602	0.455249
9	0.188407	0.252228	2.493945	0.833956	14.747260	12.044832	0.615270
10	0.043554	0.244529	0.894356	0.324217	12.875479	10.282988	0.196747
11	0.176928	0.180441	2.198896	0.669148	14.493702	13.226416	0.343713
12	0.122820	0.110766	2.953618	0.929649	14.949231	10.096406	0.529761
13	0.155939	0.237080	0.744055	0.102866	13.478119	12.425914	0.968424
14	0.066703	0.219910	1.295623	0.554399	11.631811	14.876004	0.588699
15	0.061973	0.179125	4.654101	0.375086	11.081292	10.521093	0.717606
16	0.092996	0.276388	3.934188	0.448308	13.756615	12.745539	0.291697
17	0.154490	0.228798	3.155262	0.209700	14.145472	13.320453	0.867700
18	0.047826	0.295456	1.831015	0.117507	13.649584	11.321448	0.441672
19	0.025519	0.207379	3.353283	0.983222	11.805350	11.781888	0.105139

Train-test split

Next we split the data into a training set (80%) and a testing set (20%).

train_in, test_in, train_out, test_out = train_test_split(
    inputs, outputs, test_size=0.2
)

Normalization of inputs

To ensure an equal weighting of inputs and outputs in the backpropagation algorithm fitting the neural network, we have to normalize the input values. For example window-to-wall ratio is in the range of 0 to 1 while the $W/$m^2$ are in a range of 10 to 15. Different options for normalization exist. Here we bring all features (input variables) to have zero mean and a standarddeviation of 1. Note that we fit the normalizer on training data only.

scaler = StandardScaler()
inputs = scaler.fit_transform(X=train_in)

scaler_out = StandardScaler()
outputs = scaler_out.fit_transform(X=train_out)

Hyper-parameters

Before we start fitting the NN model we define the set of hyperparameters we want to analyse in our cross-validation to optimize the model. Here, we select the number of layers of the network as well as the regularization parameter alpha as parameter value. A larger number of layers and a lower value of the regularizer lead to higher variance of the network. This may lead to overfitting. The best selection may be found using an optimizer like Bayesian Optimization. In this example we use a simple grid search.

hyperparameters = {
    "hidden_layer_sizes": (
        (len(parameters) * 16,),
        (len(parameters) * 16, len(parameters) * 16),
    ),
    "alpha": [1, 10, 10 ** 3],
}

neural_net = MLPRegressor(max_iter=1000, early_stopping=False)
folds = 3

Model fitting

Here, we use the NN model from ScikitLearn. In a different example we use TensorFlow (with and without the Keras wrapper).

clf = GridSearchCV(neural_net, hyperparameters, iid=True, cv=folds)
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=FutureWarning)
    clf.fit(inputs, outputs.ravel())


print(f"Best performing model $R^2$ score on training set: {clf.best_score_}")
print(f"Model $R^2$ parameters: {clf.best_params_}")
print(
    f"Best performing model $R^2$ score on a separate test set: {clf.best_estimator_.score(scaler.transform(test_in), scaler_out.transform(test_out))}"
)

Best performing model $R^2$ score on training set: 0.7459884525525834
Model $R^2$ parameters: {'alpha': 1, 'hidden_layer_sizes': (112, 112)}
Best performing model $R^2$ score on a separate test set: 0.9416874254967937

Surrogate Modelling Evaluator object

We can wrap the fitted model in a BESOS Evaluator. This has identical behaviour to the original EnergyPlus Evaluator object.

def evaluation_func(ind, scaler=scaler):
    ind = scaler.transform(X=[ind])
    return (scaler_out.inverse_transform(clf.predict(ind))[0],)


NN_SM = EvaluatorGeneric(evaluation_func, problem)

This has identical behaviour to the original EnergyPlus Evaluator object. In the next cells we generate a single input sample and evaluate it using the surrogate model and EnergyPlus.

sample = sampling.dist_sampler(sampling.lhs, problem, 1)
values = sample.values[0]
print(values)

[ 0.11669079  0.17961374  4.25848042  0.13397506 10.48584932 10.17177171
  0.35134885]

NN_SM(values)[0]

1814729300.020491

evaluator(values)[0]

1751712292.4908545

Running a large surrogate evaluation

inputs = sampling.dist_sampler(sampling.lhs, problem, 5000)
outputs = NN_SM.df_apply(inputs)
results = inputs.join(outputs)
results.head()

HBox(children=(FloatProgress(value=0.0, description='Executing', max=5000.0, style=ProgressStyle(description_w…

	Conductivity	Thickness	U-Factor	Solar Heat Gain Coefficient	ElectricEquipment	Lights	Window to Wall Ratio	Electricity:Facility
0	0.092928	0.238438	4.718053	0.301978	14.540408	13.699712	0.718998	2.198494e+09
1	0.127584	0.228582	0.544851	0.249709	11.915124	13.472015	0.221293	2.061339e+09
2	0.039634	0.158802	3.416324	0.086649	11.756280	10.131401	0.758322	1.823225e+09
3	0.175974	0.152255	4.880733	0.328792	12.571576	10.447726	0.787714	1.922154e+09
4	0.192895	0.221227	4.634923	0.534569	12.375788	14.908257	0.197574	2.200912e+09

Visualization

chart_studio.tools.set_credentials_file(
    username="besos", api_key="Kb2G2bjOh5gmwh1Midwq"
)
df = inputs.round(3)

# generate list if dictionaries
l = list()
for i in df.columns:
    l.extend([dict(label=i, values=df[i])])

l.extend([dict(label=outputs.columns[0], values=outputs.round(-5))])

data = [
    go.Parcoords(
        line=dict(
            color=outputs["Electricity:Facility"],
            colorscale=[[0, "#D7C16B"], [0.5, "#23D8C3"], [1, "#F3F10F"]],
        ),
        dimensions=l,
    )
]

layout = go.Layout(plot_bgcolor="#E5E5E5", paper_bgcolor="#E5E5E5")

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename="parcoords-basic")