0%

Part1-正则化线性回归和偏差方差练习

Part1-正则化线性回归和偏差方差练习

正则化线性回归

1.1 数据可视化

1
2
from scipy.io import loadmat
data = loadmat('ex5data1.mat')
1
X, y, Xval, yval, Xtest, ytest = map(np.ravel, [data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest']])
1
2
3
import matplotlib.pyplot as plt
plt.scatter(X, y, marker='x', c='r')
plt.show()

1.2 正则化线性回归代价函数

1
2
3
4
import numpy as np
X = X.reshape(-1, 1)
X = np.insert(X, 0, 1, axis=1)
theta = np.ones(X.shape[1])
1
2
3
4
5
6
def reg_cost(theta, X, y, lamb):
m = X.shape[0]
t = X@theta-y # m*n@n*1
cost = (t.T@t)/(2*m)
reg = lamb/(2*m)*np.sum(theta[1:]**2)
return cost+reg

1.3 正则化线性回归梯度计算

1
2
3
4
5
6
def reg_gradient(theta, X, y, lamb):
m = len(X)
grad = (X.T@(X@theta-y))/m
reg = lamb/m*theta # 1*2
reg[0] = 0
return grad + reg

1.4 拟合线性回归

1
2
3
import scipy.optimize as opt 
res = opt.minimize(fun=reg_cost, x0=theta, args=(X, y, 0), method='TNC', jac=reg_gradient)
res

​ fun: 22.373906495108926 ​ jac: array([ 1.52549682e-07, -7.76071267e-09])

message: 'Converged (|f_n-f_(n-1)| ~= 0)' nfev: 9 nit: 4 status: 1 success: True x: array([13.08790367, 0.36777923])

1
2
3
4
plt.scatter(data['X'], y)
temp_X = np.arange(-50,40)
plt.plot(temp_X, res.x[0]+temp_X*res.x[1])
plt.show()

偏差和方差

2.1 学习曲线

1
2
3
m = X.shape[0]
Xval = Xval.reshape(-1,1)
Xval = np.insert(Xval, 0, 1, axis=1)
1
2
3
4
5
6
7
8
train_cost = []
cv_cost = []
for i in range(1, m+1):
res = opt.minimize(fun=reg_cost, x0=np.ones(X.shape[1]), args=(X[:i], y[:i], 1), method='TNC', jac=reg_gradient)
# c = opt.minimize(fun=reg_cost, x0=theta, args=(X[0:i], y[0:i], 0), method='TNC', jac=reg_gradient).fun
train_cost.append(reg_cost(res.x, X[:i], y[:i], 0))
# c = opt.minimize(fun=reg_cost, x0=theta, args=(Xval[0:i], yval[0:i], 0), method='TNC', jac=reg_gradient).fun
cv_cost.append(reg_cost(res.x, Xval, yval, 0))
1
2
3
4
plt.plot(np.arange(1,m+1), cv_cost, label='cv cost')
plt.plot(np.arange(1,m+1), train_cost, label='train cost')
plt.legend()
plt.show()

可以看出,这个模型的拟合并不好,欠拟合

多项式回归

3.1 多项式特征

1
2
# 重新获取数据
X, y, Xval, yval, Xtest, ytest = map(np.ravel, [data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest']])
1
2
3
4
import pandas as pd
def poly_features(X, power):
data = {f'f{i}':np.power(X, i) for i in range(1, power+1)}
return pd.DataFrame(data)
1
poly_features(X, 3).head() # 测试

3.2 构建多项式数据

1
2
3
# 归一化
def normalize_feature(df):
return df.apply(lambda c: (c-c.mean())/c.std())
1
2
3
4
5
6
7
# 构建多项式数据
def prepare_poly_data(*args, power):
def prepare(X):
df = poly_features(X, power)
t = normalize_feature(df).values
return np.insert(t, 0, 1,axis=1)
return [prepare(x) for x in args]
1
2
X_poly, Xval_poly, Xtest_poly = prepare_poly_data(X, Xval, Xtest, power=8)
X_poly[:3,:] # 测试

3.2 多项式回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 画学习曲线
def plot_learning_curve(X, y, Xval, yval, lamb):
train_cost, cv_cost = [], []
m = X.shape[0]
for i in range(1, m+1):
res = opt.minimize(fun=reg_cost, x0=np.ones(X.shape[1]), args=(X[:i], y[:i], lamb), method='TNC', jac=reg_gradient)
# c = opt.minimize(fun=reg_cost, x0=theta, args=(X[0:i], y[0:i], 0), method='TNC', jac=reg_gradient).fun
train_cost.append(reg_cost(res.x, X[:i], y[:i], lamb))
# c = opt.minimize(fun=reg_cost, x0=theta, args=(Xval[0:i], yval[0:i], 0), method='TNC', jac=reg_gradient).fun
cv_cost.append(reg_cost(res.x, Xval, yval, lamb))
plt.plot(np.arange(1,m+1), cv_cost, label='cv cost')
plt.plot(np.arange(1,m+1), train_cost, label='train cost')
plt.legend()
plt.show()
1
plot_learning_curve(X_poly, y, Xval_poly, yval, 0)

可以看出,训练误差一直很低且为0,验证误差逐渐减小,这是过拟合的表现

3.3 调整正则化参数

\(\lambda=1\)时:

1
plot_learning_curve(X_poly, y, Xval_poly, yval, 1)

可以看出,训练误差还是很小,但不再是一起为0了,因此这是减轻后的过拟合

\(\lambda=100\)

1
plot_learning_curve(X_poly, y, Xval_poly, yval, 100)

正则化太严重,欠拟合

找到最佳的\(\lambda\)

1
2
3
4
5
6
7
8
9
lamb_candidate = [(2**t)*0.01 for t in range(11)]
lamb_candidate.insert(0, 0)
train_cost,cv_cost = [], []
for lamb in lamb_candidate:
res = opt.minimize(fun=reg_cost, x0=np.ones(X_poly.shape[1]), args=(X_poly, y, lamb), method='TNC', jac=reg_gradient)
train = reg_cost(res.x, X_poly, y, lamb)
cv = reg_cost(res.x, Xval_poly, yval, lamb)
train_cost.append(train)
cv_cost.append(cv)
1
2
3
4
5
6
plt.plot(lamb_candidate, train_cost, label='train')
plt.plot(lamb_candidate, cv_cost, label='cv')
plt.legend()
plt.xlabel('lambda')
plt.ylabel('cost')
plt.show()

1
2
# 求出最佳lamb
lamb_candidate[np.argmin(cv_cost)]

0.32

1
2
3
4
# 测试集上测试
for lamb in lamb_candidate:
res = opt.minimize(fun=reg_cost, x0=np.ones(X_poly.shape[1]), args=(X_poly, y, lamb), method='TNC', jac=reg_gradient)
print(f'test cost(l={lamb})={reg_cost(res.x, Xtest_poly, ytest, lamb)}')

test cost(lamb=0)=9.982275423899827

test cost(lamb=0.01)=10.973543277025396

test cost(lamb=0.02)=10.548120143946353

test cost(lamb=0.04)=10.007174745468829

test cost(lamb=0.08)=9.392941014843759

test cost(lamb=0.16)=8.820134306044448

test cost(lamb=0.32)=8.587713795685527

test cost(lamb=0.64)=9.228251765308313

test cost(lamb=1.28)=11.470274383762167

test cost(lamb=2.56)=16.3078760577592

test cost(lamb=5.12)=24.97737418682823

test cost(lamb=10.24)=38.10335942580261

可以确定,最佳]\(\lambda\)为0.32