Part1-多分类和神经网络练习
多分类
1.1 数据读取
1 2 3
| import pandas as pd import numpy as np from scipy.io import loadmat
|
1 2
| data = loadmat('ex3data1.mat') data
|
{'X': array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
'__globals__': [],
'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
'__version__': '1.0',
'y': array([[10],
[10],
[10],
...,
[ 9],
[ 9],
[ 9]], dtype=uint8)}
1.2 数据可视化
1 2 3 4 5 6 7
| X = data['X'] y = data['y'] i = 1616 image = X[i, :] plt.matshow(image.reshape((20, 20)), cmap='gray_r') plt.show() y.flatten()[i]
|

1.3 向量化逻辑回归
1.3.1 代价函数
1 2
| def sigmiod(z): return 1/(1+np.exp(-z))
|
1 2 3 4
| def cost(theta, X, y, lr): a = -y.T@np.log(sigmoid(X@theta.T)) b = (1-y).T@np.log(1-sigmoid(X@theta.T)) return float((a-b)/X.shape[0])
|
1.3.2 梯度计算
1 2 3
| def gradient(theta, X, y, lr): theta = theta.reshape((1, X.shape[1])) return (X.T@(sigmoid(X@theta.T)-y)).T/X.shape[0]
|
1.3.3 参数
1 2 3 4 5 6 7 8 9 10
| from scipy.optimize import minimize def one_vs_all(X, y, num_labels, lr): X = np.insert(X, 0, 1, axis=1) all_theta = np.zeros((num_labels, X.shape[1])) for i in range(1, num_labels+1): theta = np.zeros((1, X.shape[1])) y_i = np.array([1 if label==i else 0 for label in y]).reshape(X.shape[0], 1) fmin = minimize(fun=cost, x0=theta, args=(X, y_i, lr), method='TNC', jac=gradient) all_theta[i-1,:] = fmin.x return all_theta
|
1 2
| all_theta = one_vs_all(X, y, 10, 1) all_theta
|
array([[-5.00990434e+00, 0.00000000e+00, 0.00000000e+00, ...,
1.30011051e-02, 2.86538705e-07, 0.00000000e+00],
[-5.71350473e+00, 0.00000000e+00, 0.00000000e+00, ...,
6.86412447e-02, -7.09121713e-03, 0.00000000e+00],
[-8.08044687e+00, 0.00000000e+00, 0.00000000e+00, ...,
-2.56458615e-04, -1.07383182e-06, 0.00000000e+00],
...,
[-1.30853787e+01, 0.00000000e+00, 0.00000000e+00, ...,
-5.37387945e+00, 6.20602454e-01, 0.00000000e+00],
[-8.81420750e+00, 0.00000000e+00, 0.00000000e+00, ...,
-2.68709952e-01, 1.66652812e-02, 0.00000000e+00],
[-6.71877525e+00, 0.00000000e+00, 0.00000000e+00, ...,
5.14661140e-04, 2.18877575e-05, 0.00000000e+00]])
1.3.4 结果评估
1 2 3 4 5
| def predict_all(X, all_theta): X = np.insert(X, 0, 1, axis=1) h = sigmoid(X@all_theta.T) h_argmax = np.argmax(h, axis=1) return h_argmax+1
|
1 2
| y_pred = predict_all(X, all_theta) print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')
|
accuracy:97.42%
神经网络
2.1 前向传播
1 2 3
| data = loadmat('ex3weights.mat') data
|
{'Theta1':...
'Theta2':...
'__globals__': [],
'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Tue Oct 18 14:57:02 2011',
'__version__': '1.0'}
1 2 3 4 5 6 7
| def serialize(theta1, theta2): return np.append(theta1.flatten(), theta2.flatten()) def deserialize(params): theta1 = params[:(input_size+1)*hidden_size].reshape((hidden_size, -1)) theta2 = params[(input_size+1)*hidden_size:].reshape((out_size, -1)) return theta1, theta2
|
1 2 3 4 5 6 7 8 9
| def forward(X, params): theta1, theta2 = deserialize(params) a1 = np.insert(X, 0, 1, axis=1) z2 = a1@(theta1.T) a2 = sigmoid(z2) a2 = np.insert(a2, 0, 1, axis=1) z3 = a2@theta2.T h = sigmoid(z3) return a1, z2, a2, z3, h
|
1 2 3 4 5
| params = serialize(data['Theta1'],data['Theta2']) _,_,_,_,res = forward(X, params) y_pred = np.argmax(res, axis=1)+1 print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')
|
accuracy:97.52%
2.2 反向传播
2.2.1 代价函数
1 2 3 4 5 6
| def cost(params, X, Y): m = X.shape[0] a1, z2, a2, z3, h = forward(X, params) a = -Y*np.log(h) b = (1-Y)*np.log(1-h) return (a-b).sum()/m
|
1 2 3 4 5
| def reg_cost(params, X, Y, reg): r1 = np.sum(np.power(theta1[:,1:],2)) r2 = np.sum(np.power(theta2[:,1:],2)) return cost(params, X, Y) + reg*(r1+r2)/(2*X.shape[0])
|
2.2.2 Sigmoid梯度
1 2 3
| def sigmoid_grad(z): s = sigmoid(z) return s*(1-s)
|
2.2.3 随机初始化
1 2 3 4 5 6 7 8 9
|
epsilon_init = 0.12 input_size = 400 hidden_size = 25 out_size = 10 weight_num = (input_size+1)*hidden_size+(hidden_size+1)*out_size W = np.random.rand(weight_num)*2*epsilon_init - epsilon_init Y = np.array([[1 if y[i]==x else 0 for x in range(1,11)] for i in range(len(y))])
|
2.2.4 反向传播
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| def backprop(params, X, Y): m = X.shape[0] a1, z2, a2, z3, h = forward(X, params) delta1 = np.zeros(theta1.shape) delta2 = np.zeros(theta2.shape) J = cost(params, X, Y)
d3 = h-Y d2 = d3@theta2[:,1:]*sigmoid_grad(z2) D2 = (d3.T@a2)/len(X) D1 = (d2.T@a1)/len(X)
grad = serialize(D1, D2) return J, grad
|
1 2 3 4 5 6 7
| def reg_backprop(params, X, Y, reg): _,D = backprop(params, X, Y) D1,D2 = deserialize(D) D1[:, 1:]+= (reg*theta1[:,1:])/len(X) D2[:, 1:]+= (reg*theta2[:,1:])/len(X) J = reg_cost(params, X, Y, reg) return J,serialize(D1, D2)
|
2.2.5 计算参数
1 2 3 4 5
| from scipy.optimize import minimize fmin = minimize(fun=backprop, x0=W, args=(X, Y), method='TNC', jac=True, options={'maxiter': 250}) _,_,_,_,h = forward(X, fmin.x) y_pred = np.argmax(h, axis=1)+1 print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')
|
accuracy:99.88%
1 2 3 4
| fmin = minimize(fun=reg_backprop, x0=W, args=(X, Y, 1), method='TNC', jac=True, options={'maxiter': 250}) _,_,_,_,h = forward(X, fmin.x) y_pred = np.argmax(h, axis=1)+1 print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')
|
accuracy:95.72%