Part1-多分类和神经网络练习

多分类

1.1 数据读取

1
2
3

import pandas as pd
import numpy as np
from scipy.io import loadmat

1 2	data = loadmat('ex3data1.mat') # 使用scipy.io中的loadmat加载mat数据 data

{'X': array([[0., 0., 0., ..., 0., 0., 0.],

[0., 0., 0., ..., 0., 0., 0.],

[0., 0., 0., ..., 0., 0., 0.],

...,

[0., 0., 0., ..., 0., 0., 0.],

[0., 0., 0., ..., 0., 0., 0.],

[0., 0., 0., ..., 0., 0., 0.]]),

'__globals__': [],

'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',

'__version__': '1.0',

'y': array([[10],

[10],

[10],

...,

[ 9],

[ 9],

[ 9]], dtype=uint8)}

1.2 数据可视化

X = data['X']
y = data['y']
i = 1616
image = X[i, :]
plt.matshow(image.reshape((20, 20)), cmap='gray_r')
plt.show()
y.flatten()[i]

1.3 向量化逻辑回归

1.3.1 代价函数

1 2	def sigmiod(z): return 1/(1+np.exp(-z))

def cost(theta, X, y, lr):
  a = -y.T@np.log(sigmoid(X@theta.T))
  b = (1-y).T@np.log(1-sigmoid(X@theta.T))
  return float((a-b)/X.shape[0])

1.3.2 梯度计算

1
2
3

def gradient(theta, X, y, lr): # n*m @ m*1
  theta = theta.reshape((1, X.shape[1]))
  return (X.T@(sigmoid(X@theta.T)-y)).T/X.shape[0]

1.3.3 参数

from scipy.optimize import minimize
def one_vs_all(X, y, num_labels, lr):
  X = np.insert(X, 0, 1, axis=1)
  all_theta = np.zeros((num_labels, X.shape[1]))
  for i in range(1, num_labels+1):
    theta = np.zeros((1, X.shape[1]))
    y_i = np.array([1 if label==i else 0 for label in y]).reshape(X.shape[0], 1)
    fmin = minimize(fun=cost, x0=theta, args=(X, y_i, lr), method='TNC', jac=gradient)
    all_theta[i-1,:] = fmin.x
  return all_theta

1 2	all_theta = one_vs_all(X, y, 10, 1) all_theta

array([[-5.00990434e+00, 0.00000000e+00, 0.00000000e+00, ...,

1.30011051e-02, 2.86538705e-07, 0.00000000e+00],

[-5.71350473e+00, 0.00000000e+00, 0.00000000e+00, ...,

6.86412447e-02, -7.09121713e-03, 0.00000000e+00],

[-8.08044687e+00, 0.00000000e+00, 0.00000000e+00, ...,

-2.56458615e-04, -1.07383182e-06, 0.00000000e+00],

...,

[-1.30853787e+01, 0.00000000e+00, 0.00000000e+00, ...,

-5.37387945e+00, 6.20602454e-01, 0.00000000e+00],

[-8.81420750e+00, 0.00000000e+00, 0.00000000e+00, ...,

-2.68709952e-01, 1.66652812e-02, 0.00000000e+00],

[-6.71877525e+00, 0.00000000e+00, 0.00000000e+00, ...,

5.14661140e-04, 2.18877575e-05, 0.00000000e+00]])

1.3.4 结果评估

def predict_all(X, all_theta):
  X = np.insert(X, 0, 1, axis=1)
  h = sigmoid(X@all_theta.T)
  h_argmax = np.argmax(h, axis=1)
  return h_argmax+1

1 2	y_pred = predict_all(X, all_theta) print(f'accuracy：{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy：97.42%

神经网络

2.1 前向传播

1
2
3

# 加载训练好的权重 
data = loadmat('ex3weights.mat')
data

{'Theta1':...

'Theta2':...

'__globals__': [],

'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Tue Oct 18 14:57:02 2011',

'__version__': '1.0'}

# 序列化和反序列化
def serialize(theta1, theta2):
  return np.append(theta1.flatten(), theta2.flatten())
def deserialize(params):
  theta1 = params[:(input_size+1)*hidden_size].reshape((hidden_size, -1))
  theta2 = params[(input_size+1)*hidden_size:].reshape((out_size, -1))
  return theta1, theta2

def forward(X, params):
  theta1, theta2 = deserialize(params)
  a1 = np.insert(X, 0, 1, axis=1)
  z2 = a1@(theta1.T)
  a2 = sigmoid(z2)
  a2 = np.insert(a2, 0, 1, axis=1)
  z3 = a2@theta2.T
  h = sigmoid(z3)
  return a1, z2, a2, z3, h

# 结果
params = serialize(data['Theta1'],data['Theta2'])
_,_,_,_,res = forward(X, params)
y_pred = np.argmax(res, axis=1)+1
print(f'accuracy：{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy：97.52%

2.2 反向传播

2.2.1 代价函数

def cost(params, X, Y):
  m = X.shape[0]
  a1, z2, a2, z3, h = forward(X, params)
  a = -Y*np.log(h) # 10*5000 * 5000*10
  b = (1-Y)*np.log(1-h)
  return (a-b).sum()/m

# 正则化代价函数
def reg_cost(params, X, Y, reg):
  r1 = np.sum(np.power(theta1[:,1:],2))
  r2 = np.sum(np.power(theta2[:,1:],2))
  return cost(params, X, Y) + reg*(r1+r2)/(2*X.shape[0])

2.2.2 Sigmoid梯度

1
2
3

def sigmoid_grad(z):
  s = sigmoid(z)
  return s*(1-s)

2.2.3 随机初始化

# 确定每层单元数
# 400+1 -> 25+1 -> 10
epsilon_init = 0.12
input_size = 400
hidden_size = 25
out_size = 10
weight_num = (input_size+1)*hidden_size+(hidden_size+1)*out_size
W = np.random.rand(weight_num)*2*epsilon_init - epsilon_init
Y = np.array([[1 if y[i]==x else 0 for x in range(1,11)] for i in range(len(y))])

2.2.4 反向传播

def backprop(params, X, Y):
  m = X.shape[0]
  a1, z2, a2, z3, h = forward(X, params)
  delta1 = np.zeros(theta1.shape)
  delta2 = np.zeros(theta2.shape)
  J = cost(params, X, Y)

  d3 = h-Y
  d2 = d3@theta2[:,1:]*sigmoid_grad(z2)
  D2 = (d3.T@a2)/len(X)
  D1 = (d2.T@a1)/len(X)

  grad = serialize(D1, D2)
  return J, grad

def reg_backprop(params, X, Y, reg):
  _,D = backprop(params, X, Y)
  D1,D2 = deserialize(D)
  D1[:, 1:]+= (reg*theta1[:,1:])/len(X)
  D2[:, 1:]+= (reg*theta2[:,1:])/len(X)
  J = reg_cost(params, X, Y, reg)
  return J,serialize(D1, D2)

2.2.5 计算参数

from scipy.optimize import minimize
fmin = minimize(fun=backprop, x0=W, args=(X, Y), method='TNC', jac=True, options={'maxiter': 250})
_,_,_,_,h = forward(X, fmin.x)
y_pred = np.argmax(h, axis=1)+1
print(f'accuracy：{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy：99.88%

fmin = minimize(fun=reg_backprop, x0=W, args=(X, Y, 1), method='TNC', jac=True, options={'maxiter': 250})
_,_,_,_,h = forward(X, fmin.x)
y_pred = np.argmax(h, axis=1)+1
print(f'accuracy：{np.sum(y_pred==y.flatten())/len(y_pred)*100}%')

accuracy：95.72%