Part1-正则化线性回归和偏差方差练习
正则化线性回归
1.1 数据可视化
1 2 from scipy.io import loadmatdata = loadmat('ex5data1.mat' )
1 X, y, Xval, yval, Xtest, ytest = map (np.ravel, [data['X' ], data['y' ], data['Xval' ], data['yval' ], data['Xtest' ], data['ytest' ]])
1 2 3 import matplotlib.pyplot as pltplt.scatter(X, y, marker='x' , c='r' ) plt.show()
1.2 正则化线性回归代价函数
1 2 3 4 import numpy as npX = X.reshape(-1 , 1 ) X = np.insert(X, 0 , 1 , axis=1 ) theta = np.ones(X.shape[1 ])
1 2 3 4 5 6 def reg_cost (theta, X, y, lamb ): m = X.shape[0 ] t = X@theta-y cost = (t.T@t)/(2 *m) reg = lamb/(2 *m)*np.sum (theta[1 :]**2 ) return cost+reg
1.3 正则化线性回归梯度计算
1 2 3 4 5 6 def reg_gradient (theta, X, y, lamb ): m = len (X) grad = (X.T@(X@theta-y))/m reg = lamb/m*theta reg[0 ] = 0 return grad + reg
1.4 拟合线性回归
1 2 3 import scipy.optimize as opt res = opt.minimize(fun=reg_cost, x0=theta, args=(X, y, 0 ), method='TNC' , jac=reg_gradient) res
fun: 22.373906495108926 jac: array([ 1.52549682e-07, -7.76071267e-09])
message: 'Converged (|f_n-f_(n-1)| ~= 0)' nfev: 9 nit: 4 status: 1 success: True x: array([13.08790367, 0.36777923])
1 2 3 4 plt.scatter(data['X' ], y) temp_X = np.arange(-50 ,40 ) plt.plot(temp_X, res.x[0 ]+temp_X*res.x[1 ]) plt.show()
偏差和方差
2.1 学习曲线
1 2 3 m = X.shape[0 ] Xval = Xval.reshape(-1 ,1 ) Xval = np.insert(Xval, 0 , 1 , axis=1 )
1 2 3 4 5 6 7 8 train_cost = [] cv_cost = [] for i in range (1 , m+1 ): res = opt.minimize(fun=reg_cost, x0=np.ones(X.shape[1 ]), args=(X[:i], y[:i], 1 ), method='TNC' , jac=reg_gradient) train_cost.append(reg_cost(res.x, X[:i], y[:i], 0 )) cv_cost.append(reg_cost(res.x, Xval, yval, 0 ))
1 2 3 4 plt.plot(np.arange(1 ,m+1 ), cv_cost, label='cv cost' ) plt.plot(np.arange(1 ,m+1 ), train_cost, label='train cost' ) plt.legend() plt.show()
可以看出,这个模型的拟合并不好,欠拟合 了
多项式回归
3.1 多项式特征
1 2 X, y, Xval, yval, Xtest, ytest = map (np.ravel, [data['X' ], data['y' ], data['Xval' ], data['yval' ], data['Xtest' ], data['ytest' ]])
1 2 3 4 import pandas as pddef poly_features (X, power ): data = {f'f{i} ' :np.power(X, i) for i in range (1 , power+1 )} return pd.DataFrame(data)
1 poly_features(X, 3 ).head()
3.2 构建多项式数据
1 2 3 def normalize_feature (df ): return df.apply(lambda c: (c-c.mean())/c.std())
1 2 3 4 5 6 7 def prepare_poly_data (*args, power ): def prepare (X ): df = poly_features(X, power) t = normalize_feature(df).values return np.insert(t, 0 , 1 ,axis=1 ) return [prepare(x) for x in args]
1 2 X_poly, Xval_poly, Xtest_poly = prepare_poly_data(X, Xval, Xtest, power=8 ) X_poly[:3 ,:]
3.2 多项式回归
1 2 3 4 5 6 7 8 9 10 11 12 13 14 def plot_learning_curve (X, y, Xval, yval, lamb ): train_cost, cv_cost = [], [] m = X.shape[0 ] for i in range (1 , m+1 ): res = opt.minimize(fun=reg_cost, x0=np.ones(X.shape[1 ]), args=(X[:i], y[:i], lamb), method='TNC' , jac=reg_gradient) train_cost.append(reg_cost(res.x, X[:i], y[:i], lamb)) cv_cost.append(reg_cost(res.x, Xval, yval, lamb)) plt.plot(np.arange(1 ,m+1 ), cv_cost, label='cv cost' ) plt.plot(np.arange(1 ,m+1 ), train_cost, label='train cost' ) plt.legend() plt.show()
1 plot_learning_curve(X_poly, y, Xval_poly, yval, 0 )
可以看出,训练误差一直很低且为0,验证误差逐渐减小,这是过拟合 的表现
3.3 调整正则化参数
当\(\lambda=1\) 时:
1 plot_learning_curve(X_poly, y, Xval_poly, yval, 1 )
可以看出,训练误差还是很小,但不再是一起为0了,因此这是减轻后的过拟合
当\(\lambda=100\) 时
1 plot_learning_curve(X_poly, y, Xval_poly, yval, 100 )
正则化太严重,欠拟合 了
找到最佳的\(\lambda\)
1 2 3 4 5 6 7 8 9 lamb_candidate = [(2 **t)*0.01 for t in range (11 )] lamb_candidate.insert(0 , 0 ) train_cost,cv_cost = [], [] for lamb in lamb_candidate: res = opt.minimize(fun=reg_cost, x0=np.ones(X_poly.shape[1 ]), args=(X_poly, y, lamb), method='TNC' , jac=reg_gradient) train = reg_cost(res.x, X_poly, y, lamb) cv = reg_cost(res.x, Xval_poly, yval, lamb) train_cost.append(train) cv_cost.append(cv)
1 2 3 4 5 6 plt.plot(lamb_candidate, train_cost, label='train' ) plt.plot(lamb_candidate, cv_cost, label='cv' ) plt.legend() plt.xlabel('lambda' ) plt.ylabel('cost' ) plt.show()
1 2 lamb_candidate[np.argmin(cv_cost)]
0.32
1 2 3 4 for lamb in lamb_candidate: res = opt.minimize(fun=reg_cost, x0=np.ones(X_poly.shape[1 ]), args=(X_poly, y, lamb), method='TNC' , jac=reg_gradient) print (f'test cost(l={lamb} )={reg_cost(res.x, Xtest_poly, ytest, lamb)} ' )
test cost(lamb=0)=9.982275423899827
test cost(lamb=0.01)=10.973543277025396
test cost(lamb=0.02)=10.548120143946353
test cost(lamb=0.04)=10.007174745468829
test cost(lamb=0.08)=9.392941014843759
test cost(lamb=0.16)=8.820134306044448
test cost(lamb=0.32)=8.587713795685527
test cost(lamb=0.64)=9.228251765308313
test cost(lamb=1.28)=11.470274383762167
test cost(lamb=2.56)=16.3078760577592
test cost(lamb=5.12)=24.97737418682823
test cost(lamb=10.24)=38.10335942580261
可以确定,最佳]\(\lambda\) 为0.32