首页 > 编程语言 > 详细

Python实现PCA

时间:2020-02-08 21:26:44      阅读:101      评论:0      收藏:0      [点我收藏+]

PCA的主要方法是:奇异值分解。

具体内容见如下链接:

https://www.cnblogs.com/leftnoteasy/archive/2011/01/19/svd-and-applications.html

数据集

链接:https://pan.baidu.com/s/1ObxOER_eXsfZa4qQNHxUsw
提取码:4zga

代码

import numpy as np
import pandas as pd
import scipy.io as spio
import matplotlib.pyplot as plt
from scipy import misc      # 图片操作

def featureNormalize(X):
    n = X.shape[1]
    mu = np.mean(X,axis=0)
    sigma = np.std(X,axis=0)
    
    for i in range(n):
        X[:,i] = (X[:,i] - mu[i]) / sigma[i]
    return X,mu,sigma

def plot_data_2d(X,marker):
    plt.plot(X[:,0],X[:,1],marker) 
    return plt

def drawline(plt,p1,p2,line_type):
    plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type) 

def projectData(X_norm,U,K):
    U_reduce = U[:,0:K]
    return np.dot(X_norm,U_reduce)

def display_imageData(imgData):
    sum = 0
    ‘‘‘
    显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可)
    - 初始化一个二维数组
    - 将每行的数据调整成图像的矩阵,放进二维数组
    - 显示即可
    ‘‘‘
    m,n = imgData.shape
    width = np.int32(np.round(np.sqrt(n)))
    height = np.int32(n/width);
    rows_count = np.int32(np.floor(np.sqrt(m)))
    cols_count = np.int32(np.ceil(m/rows_count))
    pad = 1
    display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad)))
    for i in range(rows_count):
        for j in range(cols_count):
            max_val = np.max(np.abs(imgData[sum,:]))
            display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val    # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行
            sum += 1
    plt.figure(figsize=(10,8))        
    plt.imshow(display_array,cmap=gray)   #显示灰度图像
    plt.axis(off)
    plt.show()

def recoverData(Z,U,K):
    X_rec = np.zeros((Z.shape[0],U.shape[0]))
    U_recude = U[:,0:K]
    X_rec = np.dot(Z,np.transpose(U_recude))  # 还原数据(近似)
    return X_rec

def PCA_Face():
    print("加载图像数据")
    data = spio.loadmat("data_faces.mat")
    X = data[X]
    m,n = X.shape
    #print(pd.DataFrame(X))
    display_imageData(X[0:100,:])
    
    X_norm,mu,sigma = featureNormalize(X)
    Sigma = np.dot(np.transpose(X_norm),X_norm)/m  # 求Sigma
    U,S,V = np.linalg.svd(Sigma)                   # 奇异值分解
    display_imageData(np.transpose(U[:,0:100]))     # 显示U的数据
    
    K = 100    # 降维100维(原先是32*32=1024维的)
    Z = projectData(X_norm, U, K)
    print (u投影之后Z向量的大小:%d %d %Z.shape)
    
    print (u显示降维之后的数据......)
    X_rec = recoverData(Z, U, K) # 恢复数据
    display_imageData(X_rec[0:100,:])

#数据降维
def PCA_2D():
    data = spio.loadmat("pcadata.mat")
    X = data[X]
    #print(X)
    m,n = X.shape
    #画图
    plt = plot_data_2d(X,o)
    plt.show()
    
    X_copy = X.copy()
    X_norm,mu,sigma = featureNormalize(X_copy)
    #print(X_norm,mu,sigma)
    #plot_data_2d(X_norm,‘o‘) #画归一化数据
    #plt.show()
    
    Sigma = np.dot(np.transpose(X_norm),X_norm) / m
    U,S,V = np.linalg.svd(Sigma)
    #print(U,S,V)
    
    plt = plot_data_2d(X,o)
    drawline(plt, mu, mu+S[0]*(U[:,0]), r--)
    plt.show()
    
    K = 1 #设置降维维数, 这里是降了 1 维
    Z = projectData(X_norm,U,K) #投影
    #print(Z)   #能观察到数据以及是1维的了, 说明成功将数据降到了一维
    
    # 恢复数据
    X_rec = recoverData(Z,U,K)    # 恢复   此处的 U 等于 V的转置
    plt = plot_data_2d(X_norm,bo)
    plot_data_2d(X_rec,ro)
    for i in range(X_norm.shape[0]):
        drawline(plt, X_norm[i,:], X_rec[i,:], --k)
    plt.axis(square)
    plt.show()  
    
    
    
if __name__ == "__main__":
    PCA_2D()
    PCA_Face()

 

Python实现PCA

原文:https://www.cnblogs.com/boniface/p/12285077.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!