PCA的主要方法是:奇异值分解。
具体内容见如下链接:
https://www.cnblogs.com/leftnoteasy/archive/2011/01/19/svd-and-applications.html
数据集
链接:https://pan.baidu.com/s/1ObxOER_eXsfZa4qQNHxUsw
提取码:4zga
代码
import numpy as np import pandas as pd import scipy.io as spio import matplotlib.pyplot as plt from scipy import misc # 图片操作 def featureNormalize(X): n = X.shape[1] mu = np.mean(X,axis=0) sigma = np.std(X,axis=0) for i in range(n): X[:,i] = (X[:,i] - mu[i]) / sigma[i] return X,mu,sigma def plot_data_2d(X,marker): plt.plot(X[:,0],X[:,1],marker) return plt def drawline(plt,p1,p2,line_type): plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type) def projectData(X_norm,U,K): U_reduce = U[:,0:K] return np.dot(X_norm,U_reduce) def display_imageData(imgData): sum = 0 ‘‘‘ 显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可) - 初始化一个二维数组 - 将每行的数据调整成图像的矩阵,放进二维数组 - 显示即可 ‘‘‘ m,n = imgData.shape width = np.int32(np.round(np.sqrt(n))) height = np.int32(n/width); rows_count = np.int32(np.floor(np.sqrt(m))) cols_count = np.int32(np.ceil(m/rows_count)) pad = 1 display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad))) for i in range(rows_count): for j in range(cols_count): max_val = np.max(np.abs(imgData[sum,:])) display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行 sum += 1 plt.figure(figsize=(10,8)) plt.imshow(display_array,cmap=‘gray‘) #显示灰度图像 plt.axis(‘off‘) plt.show() def recoverData(Z,U,K): X_rec = np.zeros((Z.shape[0],U.shape[0])) U_recude = U[:,0:K] X_rec = np.dot(Z,np.transpose(U_recude)) # 还原数据(近似) return X_rec def PCA_Face(): print("加载图像数据") data = spio.loadmat("data_faces.mat") X = data[‘X‘] m,n = X.shape #print(pd.DataFrame(X)) display_imageData(X[0:100,:]) X_norm,mu,sigma = featureNormalize(X) Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma U,S,V = np.linalg.svd(Sigma) # 奇异值分解 display_imageData(np.transpose(U[:,0:100])) # 显示U的数据 K = 100 # 降维100维(原先是32*32=1024维的) Z = projectData(X_norm, U, K) print (u‘投影之后Z向量的大小:%d %d‘ %Z.shape) print (u‘显示降维之后的数据......‘) X_rec = recoverData(Z, U, K) # 恢复数据 display_imageData(X_rec[0:100,:]) #数据降维 def PCA_2D(): data = spio.loadmat("pcadata.mat") X = data[‘X‘] #print(X) m,n = X.shape #画图 plt = plot_data_2d(X,‘o‘) plt.show() X_copy = X.copy() X_norm,mu,sigma = featureNormalize(X_copy) #print(X_norm,mu,sigma) #plot_data_2d(X_norm,‘o‘) #画归一化数据 #plt.show() Sigma = np.dot(np.transpose(X_norm),X_norm) / m U,S,V = np.linalg.svd(Sigma) #print(U,S,V) plt = plot_data_2d(X,‘o‘) drawline(plt, mu, mu+S[0]*(U[:,0]), ‘r--‘) plt.show() K = 1 #设置降维维数, 这里是降了 1 维 Z = projectData(X_norm,U,K) #投影 #print(Z) #能观察到数据以及是1维的了, 说明成功将数据降到了一维 # 恢复数据 X_rec = recoverData(Z,U,K) # 恢复 此处的 U 等于 V的转置 plt = plot_data_2d(X_norm,‘bo‘) plot_data_2d(X_rec,‘ro‘) for i in range(X_norm.shape[0]): drawline(plt, X_norm[i,:], X_rec[i,:], ‘--k‘) plt.axis(‘square‘) plt.show() if __name__ == "__main__": PCA_2D() PCA_Face()
原文:https://www.cnblogs.com/boniface/p/12285077.html