Regression Intuition#

[1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
import seaborn as sns
from mpl_toolkits import mplot3d

[13]:

X, y = make_regression(n_features=1,noise=30,random_state=42,bias=100)

[18]:

df = pd.DataFrame(np.hstack((X,y.reshape(-1,1))),columns=['x1','y'])

[19]:

df['x0'] = 1

[20]:

df.plot(x='x1',y='y',kind='scatter')

[20]:

<AxesSubplot:xlabel='x1', ylabel='y'>

../_images/notebooks_regression_understanding_5_1.png

[21]:

def plot_regression(x,y,y_hat,figsize=(12,5)):
    fig, ax = plt.subplots(1,2,figsize=figsize)

    ax[0].scatter(x, y, label='original')
    ax[0].plot(x, y_hat, 'k.', label='predicted')

    ax[1].plot(y, label='original')
    ax[1].plot(y_hat, label='predicted')

    plt.legend()

Fitting a linear regression model#

revisiting psuedo inverse#

\begin{align*} X \theta = Y\\ \theta = X^{-1} Y \end{align*}

[22]:

theta = np.linalg.pinv(df[['x0','x1']].values) @ df.y.values
print("theta :",theta)

y_hat = df[['x0','x1']].values @ theta

plot_regression(df.x1,df.y, y_hat)

theta : [103.49534596  49.82930935]

../_images/notebooks_regression_understanding_10_1.png

revisiting svd and linear systems#

[23]:

u,s,vT = np.linalg.svd(df[['x0','x1']].values,full_matrices=False)

theta = vT.T @ np.linalg.pinv(np.diag(s)) @ u.T @ df.y

print("theta :",theta)

y_hat = df[['x0','x1']].values @ theta
plot_regression(df.x1,df.y, y_hat)

theta : [103.49534596  49.82930935]

../_images/notebooks_regression_understanding_12_1.png

good old sklearn#

[24]:

from sklearn.linear_model import LinearRegression

[25]:

model = LinearRegression()
model = model.fit(df[['x0','x1']].values,df.y.values)

[26]:

y_hat = model.predict(df[['x0','x1']].values)

[27]:

plot_regression(df.x1,df.y, y_hat)

../_images/notebooks_regression_understanding_17_0.png

lets try something with Neural Networks#

[28]:

import tensorflow as tf

A neural net like perceptron#

[29]:

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

[30]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=1)
])

[31]:

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense (Dense)               (None, 1)                 3

=================================================================
Total params: 8
Trainable params: 3
Non-trainable params: 5
_________________________________________________________________

[32]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[32]:

../_images/notebooks_regression_understanding_24_0.png

[33]:

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

../_images/notebooks_regression_understanding_25_0.png

it is not trained yet. so result is understandable.

[34]:

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

[35]:

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=1000,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

[36]:

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch

[38]:

history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

[38]:

<AxesSubplot:xlabel='epochs'>

../_images/notebooks_regression_understanding_30_1.png

../_images/notebooks_regression_understanding_30_2.png

[39]:

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

../_images/notebooks_regression_understanding_31_0.png

A little bit deep neural net but no activation functions#

[40]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch

history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_1 (Dense)             (None, 5)                 15

 dense_2 (Dense)             (None, 5)                 30

 dense_3 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_33_1.png

../_images/notebooks_regression_understanding_33_2.png

../_images/notebooks_regression_understanding_33_3.png

So I didn’t introduce any activation/ non-linearity, and it is, no matter how deep the network is, a linear regression model. Ha Ha Ha

[41]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[41]:

../_images/notebooks_regression_understanding_35_0.png

now a neural net with sigmoid applied#

[42]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_4 (Dense)             (None, 5)                 15

 dense_5 (Dense)             (None, 5)                 30

 dense_6 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_37_1.png

../_images/notebooks_regression_understanding_37_2.png

../_images/notebooks_regression_understanding_37_3.png

A little bit curved from sigmoid, trying to fit the pattern.

[43]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[43]:

../_images/notebooks_regression_understanding_39_0.png

2 sigmoids applied in the net#

[44]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=5,activation='sigmoid'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=500,
    batch_size=32,
    verbose=0,
    validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization (Normalizatio  (None, 2)                5
 n)

 dense_7 (Dense)             (None, 5)                 15

 dense_8 (Dense)             (None, 5)                 30

 dense_9 (Dense)             (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_41_1.png

../_images/notebooks_regression_understanding_41_2.png

../_images/notebooks_regression_understanding_41_3.png

more curves/ non-linear pattern matching, with increasing sigmoid layers.

[45]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[45]:

../_images/notebooks_regression_understanding_43_0.png

lets try with a little bit complex pattern#

[48]:

X, y = make_regression(n_features=1,noise=20,random_state=42,bias=100,n_samples=500)

df = pd.DataFrame()
df['x1'] = X[...,-1]**3
df['y'] = y
df['x0'] = 1
df.head()

[48]:

	x1	y	x0
0	-0.528099	57.401862	1
1	0.000913	102.950676	1
2	0.105983	123.553604	1
3	-3.232089	-9.967066	1
4	-0.057206	77.788884	1

[49]:

df.plot(x='x1',y='y',kind='scatter')

[49]:

<AxesSubplot:xlabel='x1', ylabel='y'>

../_images/notebooks_regression_understanding_46_1.png

a completely linear model for complex data#

[50]:

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

[51]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_10 (Dense)            (None, 5)                 15

 dense_11 (Dense)            (None, 5)                 30

 dense_12 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_49_1.png

../_images/notebooks_regression_understanding_49_2.png

../_images/notebooks_regression_understanding_49_3.png

As expected, no matter how deep it is, it matches a linear pattern.

[52]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[52]:

../_images/notebooks_regression_understanding_51_0.png

with a sigmoid introducing non linearity#

[53]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_13 (Dense)            (None, 5)                 15

 dense_14 (Dense)            (None, 5)                 30

 dense_15 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_53_1.png

../_images/notebooks_regression_understanding_53_2.png

../_images/notebooks_regression_understanding_53_3.png

[54]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[54]:

../_images/notebooks_regression_understanding_54_0.png

with a relu layer#

[57]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=5),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=200,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_22 (Dense)            (None, 5)                 15

 dense_23 (Dense)            (None, 5)                 30

 dense_24 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_56_1.png

../_images/notebooks_regression_understanding_56_2.png

../_images/notebooks_regression_understanding_56_3.png

[58]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[58]:

../_images/notebooks_regression_understanding_57_0.png

with two relu layers#

[59]:

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(
    df[['x0','x1']],
    df.y,
    epochs=100,
    batch_size=32,
    verbose=0,
    validation_split = 0.2
)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_1 (Normalizat  (None, 2)                5
 ion)

 dense_25 (Dense)            (None, 5)                 15

 dense_26 (Dense)            (None, 5)                 30

 dense_27 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_59_1.png

../_images/notebooks_regression_understanding_59_2.png

../_images/notebooks_regression_understanding_59_3.png

[60]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[60]:

../_images/notebooks_regression_understanding_60_0.png

sine wave with a nerual network#

[61]:

X, y = make_regression(n_features=1,noise=10,random_state=42,n_samples=500)

X.shape, y.shape

[61]:

((500, 1), (500,))

[62]:

df = pd.DataFrame()
df['x1'] = y
df['y'] = np.sin(X[...,-1]*4)
df['x0'] = 1
df.plot(x='x1',y='y',kind='scatter')

[62]:

<AxesSubplot:xlabel='x1', ylabel='y'>

../_images/notebooks_regression_understanding_63_1.png

Trying out with linear model#

I know this is not gonna work.

[63]:

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='linear'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=500, \
                    batch_size=32, verbose=0, validation_split = 0.2)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_2 (Normalizat  (None, 2)                5
 ion)

 dense_28 (Dense)            (None, 5)                 15

 dense_29 (Dense)            (None, 1)                 6

=================================================================
Total params: 26
Trainable params: 21
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_65_1.png

../_images/notebooks_regression_understanding_65_2.png

../_images/notebooks_regression_understanding_65_3.png

Pretty obvious.

[64]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[64]:

../_images/notebooks_regression_understanding_67_0.png

Now with 2 sigmoid layers#

[65]:

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=5, activation='sigmoid'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=500, \
                    batch_size=32, verbose=0, validation_split = 0.3)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_3 (Normalizat  (None, 2)                5
 ion)

 dense_30 (Dense)            (None, 5)                 15

 dense_31 (Dense)            (None, 5)                 30

 dense_32 (Dense)            (None, 1)                 6

=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_69_1.png

../_images/notebooks_regression_understanding_69_2.png

../_images/notebooks_regression_understanding_69_3.png

[66]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[66]:

../_images/notebooks_regression_understanding_70_0.png

6 layers neural network with sigmoid and tanh#

[69]:

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='sigmoid'),
    tf.keras.layers.Dense(units=100, activation='tanh'),
    tf.keras.layers.Dense(units=100, activation='tanh'),
    tf.keras.layers.Dense(units=1)
])

model.summary()

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss=['mse'],
    metrics=['mse']
)

history = model.fit(df[['x0','x1']], df.y, epochs=1000, \
                    batch_size=32, verbose=0, validation_split = 0.3)

history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])

y_hat = model.predict(df[['x0','x1']].values)

plot_regression(df.x1,df.y,y_hat)

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 normalization_5 (Normalizat  (None, 2)                5
 ion)

 dense_39 (Dense)            (None, 100)               300

 dense_40 (Dense)            (None, 100)               10100

 dense_41 (Dense)            (None, 100)               10100

 dense_42 (Dense)            (None, 100)               10100

 dense_43 (Dense)            (None, 100)               10100

 dense_44 (Dense)            (None, 1)                 101

=================================================================
Total params: 40,806
Trainable params: 40,801
Non-trainable params: 5
_________________________________________________________________

../_images/notebooks_regression_understanding_72_1.png

../_images/notebooks_regression_understanding_72_2.png

../_images/notebooks_regression_understanding_72_3.png

[71]:

tf.keras.utils.plot_model(model,show_layer_activations=True)

[71]:

../_images/notebooks_regression_understanding_73_0.png

Regression Intuition

Contents