Regression Intuition#
[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
import seaborn as sns
from mpl_toolkits import mplot3d
[13]:
X, y = make_regression(n_features=1,noise=30,random_state=42,bias=100)
[18]:
df = pd.DataFrame(np.hstack((X,y.reshape(-1,1))),columns=['x1','y'])
[19]:
df['x0'] = 1
[20]:
df.plot(x='x1',y='y',kind='scatter')
[20]:
<AxesSubplot:xlabel='x1', ylabel='y'>

[21]:
def plot_regression(x,y,y_hat,figsize=(12,5)):
fig, ax = plt.subplots(1,2,figsize=figsize)
ax[0].scatter(x, y, label='original')
ax[0].plot(x, y_hat, 'k.', label='predicted')
ax[1].plot(y, label='original')
ax[1].plot(y_hat, label='predicted')
plt.legend()
Fitting a linear regression model#
revisiting psuedo inverse#
- :nbsphinx-math:`begin{align}
X theta = Y\ theta = X^{-1} Y
end{align}`
[22]:
theta = np.linalg.pinv(df[['x0','x1']].values) @ df.y.values
print("theta :",theta)
y_hat = df[['x0','x1']].values @ theta
plot_regression(df.x1,df.y, y_hat)
theta : [103.49534596 49.82930935]

revisiting svd and linear systems#
[23]:
u,s,vT = np.linalg.svd(df[['x0','x1']].values,full_matrices=False)
theta = vT.T @ np.linalg.pinv(np.diag(s)) @ u.T @ df.y
print("theta :",theta)
y_hat = df[['x0','x1']].values @ theta
plot_regression(df.x1,df.y, y_hat)
theta : [103.49534596 49.82930935]

good old sklearn#
[24]:
from sklearn.linear_model import LinearRegression
[25]:
model = LinearRegression()
model = model.fit(df[['x0','x1']].values,df.y.values)
[26]:
y_hat = model.predict(df[['x0','x1']].values)
[27]:
plot_regression(df.x1,df.y, y_hat)

lets try something with Neural Networks#
[28]:
import tensorflow as tf
A neural net like perceptron#
[29]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
[30]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=1)
])
[31]:
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization (Normalizatio (None, 2) 5
n)
dense (Dense) (None, 1) 3
=================================================================
Total params: 8
Trainable params: 3
Non-trainable params: 5
_________________________________________________________________
[32]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[32]:

[33]:
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)

it is not trained yet. so result is understandable.
[34]:
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss=['mse'],
metrics=['mse']
)
[35]:
history = model.fit(
df[['x0','x1']],
df.y,
epochs=1000,
batch_size=32,
verbose=0,
validation_split = 0.2)
[36]:
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
[38]:
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
[38]:
<AxesSubplot:xlabel='epochs'>


[39]:
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)

A little bit deep neural net but no activation functions#
[40]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=100,
batch_size=32,
verbose=0,
validation_split = 0.2)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization (Normalizatio (None, 2) 5
n)
dense_1 (Dense) (None, 5) 15
dense_2 (Dense) (None, 5) 30
dense_3 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



So I didn’t introduce any activation/ non-linearity, and it is, no matter how deep the network is, a linear regression model. Ha Ha Ha
[41]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[41]:

now a neural net with sigmoid applied#
[42]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5,activation='sigmoid'),
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=100,
batch_size=32,
verbose=0,
validation_split = 0.2)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization (Normalizatio (None, 2) 5
n)
dense_4 (Dense) (None, 5) 15
dense_5 (Dense) (None, 5) 30
dense_6 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



A little bit curved from sigmoid, trying to fit the pattern.
[43]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[43]:

2 sigmoids applied in the net#
[44]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5,activation='sigmoid'),
tf.keras.layers.Dense(units=5,activation='sigmoid'),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=500,
batch_size=32,
verbose=0,
validation_split = 0.2)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization (Normalizatio (None, 2) 5
n)
dense_7 (Dense) (None, 5) 15
dense_8 (Dense) (None, 5) 30
dense_9 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



more curves/ non-linear pattern matching, with increasing sigmoid layers.
[45]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[45]:

lets try with a little bit complex pattern#
[48]:
X, y = make_regression(n_features=1,noise=20,random_state=42,bias=100,n_samples=500)
df = pd.DataFrame()
df['x1'] = X[...,-1]**3
df['y'] = y
df['x0'] = 1
df.head()
[48]:
x1 | y | x0 | |
---|---|---|---|
0 | -0.528099 | 57.401862 | 1 |
1 | 0.000913 | 102.950676 | 1 |
2 | 0.105983 | 123.553604 | 1 |
3 | -3.232089 | -9.967066 | 1 |
4 | -0.057206 | 77.788884 | 1 |
[49]:
df.plot(x='x1',y='y',kind='scatter')
[49]:
<AxesSubplot:xlabel='x1', ylabel='y'>

a completely linear model for complex data#
[50]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
[51]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=100,
batch_size=32,
verbose=0,
validation_split = 0.2
)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_1 (Normalizat (None, 2) 5
ion)
dense_10 (Dense) (None, 5) 15
dense_11 (Dense) (None, 5) 30
dense_12 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



As expected, no matter how deep it is, it matches a linear pattern.
[52]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[52]:

with a sigmoid introducing non linearity#
[53]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5, activation='sigmoid'),
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=100,
batch_size=32,
verbose=0,
validation_split = 0.2
)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_1 (Normalizat (None, 2) 5
ion)
dense_13 (Dense) (None, 5) 15
dense_14 (Dense) (None, 5) 30
dense_15 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



[54]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[54]:

with a relu layer#
[57]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5, activation='relu'),
tf.keras.layers.Dense(units=5),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=200,
batch_size=32,
verbose=0,
validation_split = 0.2
)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_1 (Normalizat (None, 2) 5
ion)
dense_22 (Dense) (None, 5) 15
dense_23 (Dense) (None, 5) 30
dense_24 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



[58]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[58]:

with two relu layers#
[59]:
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5, activation='relu'),
tf.keras.layers.Dense(units=5, activation='relu'),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(
df[['x0','x1']],
df.y,
epochs=100,
batch_size=32,
verbose=0,
validation_split = 0.2
)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_9"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_1 (Normalizat (None, 2) 5
ion)
dense_25 (Dense) (None, 5) 15
dense_26 (Dense) (None, 5) 30
dense_27 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



[60]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[60]:

sine wave with a nerual network#
[61]:
X, y = make_regression(n_features=1,noise=10,random_state=42,n_samples=500)
X.shape, y.shape
[61]:
((500, 1), (500,))
[62]:
df = pd.DataFrame()
df['x1'] = y
df['y'] = np.sin(X[...,-1]*4)
df['x0'] = 1
df.plot(x='x1',y='y',kind='scatter')
[62]:
<AxesSubplot:xlabel='x1', ylabel='y'>

Trying out with linear model#
I know this is not gonna work.
[63]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5, activation='linear'),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(df[['x0','x1']], df.y, epochs=500, \
batch_size=32, verbose=0, validation_split = 0.2)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_2 (Normalizat (None, 2) 5
ion)
dense_28 (Dense) (None, 5) 15
dense_29 (Dense) (None, 1) 6
=================================================================
Total params: 26
Trainable params: 21
Non-trainable params: 5
_________________________________________________________________



Pretty obvious.
[64]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[64]:

Now with 2 sigmoid layers#
[65]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=5, activation='sigmoid'),
tf.keras.layers.Dense(units=5, activation='sigmoid'),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(df[['x0','x1']], df.y, epochs=500, \
batch_size=32, verbose=0, validation_split = 0.3)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_3 (Normalizat (None, 2) 5
ion)
dense_30 (Dense) (None, 5) 15
dense_31 (Dense) (None, 5) 30
dense_32 (Dense) (None, 1) 6
=================================================================
Total params: 56
Trainable params: 51
Non-trainable params: 5
_________________________________________________________________



[66]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[66]:

6 layers neural network with sigmoid and tanh#
[69]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(df[['x0','x1']].values) # adapt is like fit
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(units=100, activation='sigmoid'),
tf.keras.layers.Dense(units=100, activation='sigmoid'),
tf.keras.layers.Dense(units=100, activation='sigmoid'),
tf.keras.layers.Dense(units=100, activation='tanh'),
tf.keras.layers.Dense(units=100, activation='tanh'),
tf.keras.layers.Dense(units=1)
])
model.summary()
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.01),
loss=['mse'],
metrics=['mse']
)
history = model.fit(df[['x0','x1']], df.y, epochs=1000, \
batch_size=32, verbose=0, validation_split = 0.3)
history_metrics = pd.DataFrame(history.history)
history_metrics['epochs'] = history.epoch
history_metrics.plot(x='epochs',y=['loss','val_loss'])
history_metrics.plot(x='epochs',y=['mse','val_mse'])
y_hat = model.predict(df[['x0','x1']].values)
plot_regression(df.x1,df.y,y_hat)
Model: "sequential_13"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
normalization_5 (Normalizat (None, 2) 5
ion)
dense_39 (Dense) (None, 100) 300
dense_40 (Dense) (None, 100) 10100
dense_41 (Dense) (None, 100) 10100
dense_42 (Dense) (None, 100) 10100
dense_43 (Dense) (None, 100) 10100
dense_44 (Dense) (None, 1) 101
=================================================================
Total params: 40,806
Trainable params: 40,801
Non-trainable params: 5
_________________________________________________________________



[71]:
tf.keras.utils.plot_model(model,show_layer_activations=True)
[71]:
