# Neural networks

In [1]:
import numpy as np
import pandas as pd

from sklearn import preprocessing, metrics

from keras.layers import Dense
from keras.models import Sequential

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
WHITES_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'

Read in the Wine Quality dataset.

In [3]:
whites = pd.read_csv(WHITES_URL, sep=';')

Define a new variable 'good_quality' for whites with quality >= 7.

In [4]:
whites['good_quality'] = whites.quality >= 7

Prepare the data.

In [5]:
X = whites.drop(['quality', 'good_quality'], axis=1).get_values()
y = whites.good_quality.astype('int').get_values()

Scale `X`.

In [6]:
scaler = preprocessing.StandardScaler()
X_scaled = scaler.fit_transform(X)

Initialise neural network.

In [7]:
nn = Sequential()

Input layer feeding into hidden layer with 5 neurons (sigmoid activation):

In [8]:
nn.add(Dense(input_dim=X.shape[1], units=5, activation='sigmoid'))

Hidden layer feeding into a single output neuron (sigmoid activation):

In [9]:
nn.add(Dense(units=1, activation='sigmoid'))

Use logistic loss:

In [10]:
nn.compile(loss='binary_crossentropy', optimizer='adam')

Inspect weights before training.

In [11]:
nn.get_weights()

[array([[-0.39481741, -0.09536666, -0.34531477,  0.43817371, -0.07781047],
        [-0.27063367,  0.17226714,  0.23442703,  0.3716563 , -0.01751077],
        [ 0.60730034, -0.52558172, -0.47341412, -0.05066514, -0.30108669],
        [-0.35363188, -0.60226393, -0.47207981, -0.31543466,  0.24387956],
        [-0.12141952,  0.60060877,  0.22250259, -0.50063074,  0.21751952],
        [ 0.12847966, -0.30485454,  0.2635119 ,  0.24997938,  0.56377   ],
        [-0.4788934 , -0.07476783,  0.58518356,  0.57378525,  0.15224367],
        [ 0.05944139, -0.05845046, -0.59787005,  0.3787601 , -0.47948557],
        [-0.53848195, -0.01631957, -0.506778  ,  0.17272645,  0.18484467],
        [-0.56341803,  0.04864138,  0.58080345,  0.01851743, -0.41355652],
        [-0.16350371, -0.43215483, -0.54295194,  0.27082348, -0.01351953]], dtype=float32),
 array([ 0.,  0.,  0.,  0.,  0.], dtype=float32),
 array([[-0.38505125],
        [ 0.7230866 ],
        [ 0.67454004],
        [-0.46705604],
        [-0.6055

Train the network.

In [12]:
nn.fit(X_scaled, y, batch_size=10, epochs=10, validation_split=0.2)

Train on 3918 samples, validate on 980 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0cfaee6470>

Inspect weights after training.

In [13]:
nn.get_weights()

[array([[-0.46730253, -0.25932771, -0.2482001 ,  0.28815857, -0.05317896],
        [ 0.28671721, -0.03101387,  0.06832395,  0.48858953,  0.314358  ],
        [ 0.24939002, -0.32514063, -0.32678801, -0.11232706, -0.33844876],
        [-0.86731565, -0.18484761,  0.07541508, -0.71438622, -0.23721087],
        [ 0.42230424, -0.13751331, -0.38920203,  0.24901353,  0.96934646],
        [-0.33335617,  0.19384745,  0.59140205, -0.34260234, -0.10526759],
        [-0.24479507, -0.38386905,  0.29488182,  0.71982318,  0.32430506],
        [ 0.45991471, -0.64742208, -1.01590288,  0.95318621,  0.05372089],
        [-0.65589345,  0.32255822, -0.18556926, -0.24852893, -0.13769718],
        [-0.37784705, -0.05710353,  0.38769904,  0.04424203, -0.35806829],
        [-1.05682719,  0.61235577,  0.45069709, -0.87809122, -1.16391683]], dtype=float32),
 array([ 0.39043808, -0.28668246, -0.31193611,  0.27393454,  0.27254069], dtype=float32),
 array([[-1.12472224],
        [ 0.55171263],
        [ 0.56339717],

Use network to predict probabilities.

In [14]:
pred_probs = nn.predict(X_scaled)[:,0]

Compute AUC.

In [15]:
metrics.roc_auc_score(y, pred_probs)

0.78480094782070053