from scipy.io import loadmat import matplotlib.pyplot as plt import numpy as np import pandas as pd data = loadmat('ex7data2.mat') data = pd.DataFrame(data['X'], columns=['X1','X2']) data.plot('X1','X2',kind='scatter') plt.show()
1.2 随机初始化
1 2
defrandom_init(X, K): return X.sample(K).values
1.3 寻找距离X最近的聚类中心
1 2 3 4 5 6 7 8 9 10 11 12
deffind_closest_center(X, u): m = X.shape[0] c = np.zeros(m) for i inrange(m): temp_x = X.iloc[i, :].values min_distance = 1e6 for j inrange(len(u)): distance = np.sum((temp_x-u[j, :])**2) if distance < min_distance: min_distance = distance c[i] = j return c
1.4 计算聚类中心
1 2 3 4 5 6 7
defcompute_center(X, c, k): m, n = X.shape u = np.zeros((k, n)) for i inrange(k): cs = np.where(c==i) u[i] = data.iloc[cs[0], :].mean() return u
1.5 kmeans聚类
1 2 3 4 5 6 7 8 9
defkmeans(X, k, epoch=10): m = X.shape[0] u = random_init(X, k) c = np.zeros(m) for i inrange(epoch): c = find_closest_center(X, u) u = compute_center(X, c, k) c = find_closest_center(X, u) return c, u
pic = io.imread('bird_small.png') io.imshow(pic) data = pic.reshape(-1, 3)/255 from sklearn.cluster import KMeans model = KMeans(n_clusters=16, n_init=100, n_jobs=-1) model.fit(data) center = model.cluster_centers_ C = model.predict(data) pic_recovered = center[C].reshape(pic.shape) plt.imshow(pic_recovered) plt.show()
PCA
3.1 数据可视化
1 2 3 4 5 6 7
from scipy.io import loadmat import matplotlib.pyplot as plt import numpy as np import pandas as pd data = loadmat('ex7data1.mat') plt.scatter(data['X'][:,0], data['X'][:,1]) plt.show()
3.2 PCA算法数据压缩与还原
1 2 3 4 5 6 7
defpca(X, k): m = X.shape[0] sigma = X.T@X/m U,S,V = np.linalg.svd(sigma) return X@U[:,:k],U defrecover_from_pca(Z, U, k): return Z@U[:, :k].T
1 2 3 4 5
X = data['X'] Z, U = pca(X, 1) X_recovered = recover_from_pca(Z, U, 1) plt.scatter(X_recovered[:,0],X_recovered[:, 1]) plt.show()
PCA人脸图片压缩与还原
4.1 数据读取与可视化
1 2 3 4 5 6
from scipy.io import loadmat import matplotlib.pyplot as plt import numpy as np data = loadmat('ex7faces.mat')['X'] plt.imshow(data[3,:].reshape(32, 32), cmap='gray') plt.show()
4.2 PCA压缩与恢复
1 2 3 4 5
X = data Z, U = pca(X, 100) X_recovered = recover_from_pca(Z, U, 100) plt.imshow(X_recovered[3,:].reshape(32, 32), cmap='gray') plt.show()
image-20210718174018954
4.3 使用sklearn中的PCA实现
1 2 3 4 5 6 7 8 9
from scipy.io import loadmat from sklearn.decomposition import PCA data = loadmat('ex7faces.mat')['X'] sk_pca = PCA(n_components=100) X = data Z = sk_pca.fit_transform(X) X_recover = sk_pca.inverse_transform(Z) plt.imshow(X_recovered[3,:].reshape(32, 32), cmap='gray') plt.show()