0%

Part1-多分类和神经网络练习

Part1-多分类和神经网络练习

多分类

1.1 数据读取

1
2
3
import pandas as pd
import numpy as np
from scipy.io import loadmat
1
2
data = loadmat('ex3data1.mat') # 使用scipy.io中的loadmat加载mat数据
data

{'X': array([[0., 0., 0., ..., 0., 0., 0.],

​ [0., 0., 0., ..., 0., 0., 0.],

​ [0., 0., 0., ..., 0., 0., 0.],

​ ...,

​ [0., 0., 0., ..., 0., 0., 0.],

​ [0., 0., 0., ..., 0., 0., 0.],

​ [0., 0., 0., ..., 0., 0., 0.]]),

'__globals__': [],

'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',

'__version__': '1.0',

'y': array([[10],

​ [10],

​ [10],

​ ...,

​ [ 9],

​ [ 9],

​ [ 9]], dtype=uint8)}

1.2 数据可视化

1
2
3
4
5
6
7
X = data['X']
y = data['y']
i = 1616
image = X[i, :]
plt.matshow(image.reshape((20, 20)), cmap='gray_r')
plt.show()
y.flatten()[i]

1.3 向量化逻辑回归

1.3.1 代价函数

1
2
def sigmiod(z):
return 1/(1+np.exp(-z))
1
2
3
4
def cost(theta, X, y, lr):
a = -y.T@np.log(sigmoid(X@theta.T))
b = (1-y).T@np.log(1-sigmoid(X@theta.T))
return float((a-b)/X.shape[0])

1.3.2 梯度计算

1
2
3
def gradient(theta, X, y, lr): # n*m @ m*1
theta = theta.reshape((1, X.shape[1]))
return (X.T@(sigmoid(X@theta.T)-y)).T/X.shape[0]

1.3.3 参数

1
2
3
4
5
6
7
8
9
10
from scipy.optimize import minimize
def one_vs_all(X, y, num_labels, lr):
X = np.insert(X, 0, 1, axis=1)
all_theta = np.zeros((num_labels, X.shape[1]))
for i in range(1, num_labels+1):
theta = np.zeros((1, X.shape[1]))
y_i = np.array([1 if label==i else 0 for label in y]).reshape(X.shape[0], 1)
fmin = minimize(fun=cost, x0=theta, args=(X, y_i, lr), method='TNC', jac=gradient)
all_theta[i-1,:] = fmin.x
return all_theta
1
2
all_theta = one_vs_all(X, y, 10, 1)
all_theta

array([[-5.00990434e+00, 0.00000000e+00, 0.00000000e+00, ...,

​ 1.30011051e-02, 2.86538705e-07, 0.00000000e+00],

​ [-5.71350473e+00, 0.00000000e+00, 0.00000000e+00, ...,

​ 6.86412447e-02, -7.09121713e-03, 0.00000000e+00],

​ [-8.08044687e+00, 0.00000000e+00, 0.00000000e+00, ...,

​ -2.56458615e-04, -1.07383182e-06, 0.00000000e+00],

​ ...,

​ [-1.30853787e+01, 0.00000000e+00, 0.00000000e+00, ...,

​ -5.37387945e+00, 6.20602454e-01, 0.00000000e+00],

​ [-8.81420750e+00, 0.00000000e+00, 0.00000000e+00, ...,

​ -2.68709952e-01, 1.66652812e-02, 0.00000000e+00],

​ [-6.71877525e+00, 0.00000000e+00, 0.00000000e+00, ...,

​ 5.14661140e-04, 2.18877575e-05, 0.00000000e+00]])

1.3.4 结果评估

1
2
3
4
5
def predict_all(X, all_theta):
X = np.insert(X, 0, 1, axis=1)
h = sigmoid(X@all_theta.T)
h_argmax = np.argmax(h, axis=1)
return h_argmax+1
1
2
y_pred = predict_all(X, all_theta)
print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy:97.42%

神经网络

2.1 前向传播

1
2
3
# 加载训练好的权重 
data = loadmat('ex3weights.mat')
data

{'Theta1':...

'Theta2':...

'__globals__': [],

'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Tue Oct 18 14:57:02 2011',

'__version__': '1.0'}

1
2
3
4
5
6
7
# 序列化和反序列化
def serialize(theta1, theta2):
return np.append(theta1.flatten(), theta2.flatten())
def deserialize(params):
theta1 = params[:(input_size+1)*hidden_size].reshape((hidden_size, -1))
theta2 = params[(input_size+1)*hidden_size:].reshape((out_size, -1))
return theta1, theta2
1
2
3
4
5
6
7
8
9
def forward(X, params):
theta1, theta2 = deserialize(params)
a1 = np.insert(X, 0, 1, axis=1)
z2 = a1@(theta1.T)
a2 = sigmoid(z2)
a2 = np.insert(a2, 0, 1, axis=1)
z3 = a2@theta2.T
h = sigmoid(z3)
return a1, z2, a2, z3, h
1
2
3
4
5
# 结果
params = serialize(data['Theta1'],data['Theta2'])
_,_,_,_,res = forward(X, params)
y_pred = np.argmax(res, axis=1)+1
print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy:97.52%

2.2 反向传播

2.2.1 代价函数

1
2
3
4
5
6
def cost(params, X, Y):
m = X.shape[0]
a1, z2, a2, z3, h = forward(X, params)
a = -Y*np.log(h) # 10*5000 * 5000*10
b = (1-Y)*np.log(1-h)
return (a-b).sum()/m
1
2
3
4
5
# 正则化代价函数
def reg_cost(params, X, Y, reg):
r1 = np.sum(np.power(theta1[:,1:],2))
r2 = np.sum(np.power(theta2[:,1:],2))
return cost(params, X, Y) + reg*(r1+r2)/(2*X.shape[0])

2.2.2 Sigmoid梯度

1
2
3
def sigmoid_grad(z):
s = sigmoid(z)
return s*(1-s)

2.2.3 随机初始化

1
2
3
4
5
6
7
8
9
# 确定每层单元数
# 400+1 -> 25+1 -> 10
epsilon_init = 0.12
input_size = 400
hidden_size = 25
out_size = 10
weight_num = (input_size+1)*hidden_size+(hidden_size+1)*out_size
W = np.random.rand(weight_num)*2*epsilon_init - epsilon_init
Y = np.array([[1 if y[i]==x else 0 for x in range(1,11)] for i in range(len(y))])

2.2.4 反向传播

1
2
3
4
5
6
7
8
9
10
11
12
13
14
def backprop(params, X, Y):
m = X.shape[0]
a1, z2, a2, z3, h = forward(X, params)
delta1 = np.zeros(theta1.shape)
delta2 = np.zeros(theta2.shape)
J = cost(params, X, Y)

d3 = h-Y
d2 = d3@theta2[:,1:]*sigmoid_grad(z2)
D2 = (d3.T@a2)/len(X)
D1 = (d2.T@a1)/len(X)

grad = serialize(D1, D2)
return J, grad
1
2
3
4
5
6
7
def reg_backprop(params, X, Y, reg):
_,D = backprop(params, X, Y)
D1,D2 = deserialize(D)
D1[:, 1:]+= (reg*theta1[:,1:])/len(X)
D2[:, 1:]+= (reg*theta2[:,1:])/len(X)
J = reg_cost(params, X, Y, reg)
return J,serialize(D1, D2)

2.2.5 计算参数

1
2
3
4
5
from scipy.optimize import minimize
fmin = minimize(fun=backprop, x0=W, args=(X, Y), method='TNC', jac=True, options={'maxiter': 250})
_,_,_,_,h = forward(X, fmin.x)
y_pred = np.argmax(h, axis=1)+1
print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy:99.88%

1
2
3
4
fmin = minimize(fun=reg_backprop, x0=W, args=(X, Y, 1), method='TNC', jac=True, options={'maxiter': 250})
_,_,_,_,h = forward(X, fmin.x)
y_pred = np.argmax(h, axis=1)+1
print(f'accuracy:{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy:95.72%