SVM人臉識别

SVM在中等次元的分類問題中，有較好的表現，其在某種程度上建構了一個簡單的網絡結構，類似于神經網絡中的RBF神經網絡。

人臉資料集是經典的分類和聚類問題中經常使用的資料集，次元相對不高，灰階圖像，這裡選用64*64的人臉圖像，将其reshape從1*64^2的一維數組，共40類樣本，每組10個。

通常在SVM解決較高次元問題時，需要将其适度降維，這裡選用傳統的線性降維方法PCA（KPCA同樣适用，隻是人臉資料集PCA已經夠用，不用換成更複雜的非線性降維）。

導入相關子產品；
擷取資料；
标準化（這裡可以不用，圖像資料一般已經是标準的）
比較降維到不同的次元下，各自的效果對比（這時SVM的參數不變，選用RBF核函數）
用網格搜尋最佳的pca參數和SVM參數，效果示範。

以下為實驗代碼子產品

（1）導入子產品

# -*-encoding:utf-8-*-
'''
created by zwg in 2017-03-01
'''
import numpy,time
from sklearn import datasets
from sklearn import svm
from sklearn import decomposition
from sklearn import manifold
from sklearn.cross_validation import train_test_split as tts
from sklearn import svm,neural_network
from sklearn.metrics import classification_report,precision_score,recall_score,f1_score
from sklearn import pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.grid_search import GridSearchCV
from matplotlib import pyplot
import matplotlib.colors as colors

（2）擷取資料

def get_data():
    face_data=datasets.fetch_olivetti_faces()
    #face_data=datasets.load_iris()
    data=face_data.data
    target=face_data.target
    return data,target

（3）PCA函數

def pca(x,n):
    pca_learner=decomposition.PCA(n_components=n)
    x=pca_learner.fit_transform(x)
    return x

（4）PCA & SVM（傳回得分系數）

def pca_svm(pca_n=10,svm_C=1):
    t1=time.time()
    data,target=get_data()
    #scale_learner=StandardScaler()
    #data=scale_learner.fit_transform(data)
    x_train,x_test,y_train,y_test=tts(data,target,random_state=33)
    pca_learner=decomposition.PCA(n_components=pca_n)
    x_train=pca_learner.fit_transform(x_train)
    svm_learner=svm.SVC(C=svm_C)
    svm_learner.fit(x_train,y_train)
    x_test_pre=pca_learner.transform(x_test)
    y_test_pre=svm_learner.predict(x_test_pre)
    # report=classification_report(y_test,y_test_pre)
    # print 'The Main Explanied: ',numpy.sum(pca_learner.explained_variance_ratio_)
    # print report
    # print x_test_pre.shape,y_test_pre.shape,y_test.shape
    ac=svm_learner.score(x_test_pre,y_test)
    p=precision_score(y_test,y_test_pre,average='weighted')
    r=recall_score(y_test,y_test_pre,average='weighted')
    f1=2.0/(1.0/p+1.0/r)
    t=time.time()-t1
    return ac,p,r,f1,t

（5）PCA降維到不同次元下效果比較

def pca_svm_time_score_compare():
    ac_score=[]
    p_score=[]
    r_score=[]
    f1_score=[]
    tt=[]
    stand=MinMaxScaler((20,30))
    steps=numpy.arange(10,410,10)
    for n in steps:
        ac,p,r,f1,t=pca_svm(pca_n=n)
        p_score.append(p)
        f1_score.append(f1)
        r_score.append(r)
        ac_score.append(ac)
        tt.append(t)
    p_score_stand=stand.fit_transform(numpy.array(p_score).reshape((-1,1)))
    r_score_stand=stand.fit_transform(numpy.array(r_score).reshape((-1,1)))
    f1_score_stand=stand.fit_transform(numpy.array(f1_score).reshape((-1,1)))
    ac_score_stand=stand.fit_transform(numpy.array(ac_score).reshape((-1,1)))
    figure=pyplot.figure()
    
    
    pyplot.subplot(2,1,1)
    pyplot.scatter(steps,f1_score,label='f1-score',color='red',s=p_score_stand,alpha=0.7)
    pyplot.scatter(steps,r_score,label='recall-score',color='blue',s=r_score_stand,alpha=0.7)
    pyplot.scatter(steps,p_score,label='precision-score',color='yellow',s=f1_score_stand,alpha=0.7)
    pyplot.scatter(steps,ac_score,label='accuracy-score',color='purple',s=ac_score_stand,alpha=0.7)
    pyplot.xlabel('n-components')
    pyplot.ylabel('score')
    pyplot.legend()
    pyplot.title('The Score Of SVM After PCA To N_components')
    pyplot.subplot(2,1,2)
    pyplot.plot(steps,tt,label='cost-time',color='black',marker='o')
    # for i in range(len(tt)):
        # pyplot.text(steps[i],ac_score[i],str(round(tt[i],1))+'s',fontdict=dict(size=10,weight='normal'))
        # pyplot.plot([steps[i],steps[i]],[0,ac_score[i]],'--b')
    pyplot.legend()
    pyplot.xlabel('n-components')
    pyplot.ylabel('time')
    pyplot.show()

（6）網格搜尋pca和SVM最佳參數，并進行可視化

# pca before svm fitting is better
def pca_svm_pipeline():
    #svm_C=numpy.linspace(0.5,10,10)
    svm_C=[1]
    pca_n_components=numpy.arange(5,200,10)
    data,target=get_data()
    x_train,x_test,y_train,y_test=tts(data,target,random_state=33)
    #scale_learner=StandardScaler()
    pca_learner=decomposition.PCA()
    svm_learner=svm.SVC()
    pipe=pipeline.Pipeline([('pca',pca_learner),('svm',svm_learner)])
    gscv=GridSearchCV(pipe,
                      {'pca__n_components':pca_n_components,'svm__C':svm_C},n_jobs=-1)
    gscv.fit(x_train,y_train)
    y_test_pre=gscv.predict(x_test)
    report=classification_report(y_test,y_test_pre)
    print gscv.best_params_
    print report
    target_pre=gscv.predict(data)
    n1,n2=data.shape
    figure=pyplot.figure()
    L=numpy.zeros((40,))
    xx=numpy.linspace(0,1,64)+13
    yy=numpy.linspace(1,0,64)+13
    xx,yy=numpy.meshgrid(xx,yy)
    for i in xrange(n1):
        k=target_pre[i]
        g=L[k]
        L[k]+=1
        xx1=xx-k
        yy1=yy-g
        pyplot.contourf(xx1,yy1,data[i].reshape((64,64)),cmap='gray')
        if target[i]!=target_pre[i]:
            pyplot.scatter(numpy.mean(xx1),numpy.mean(yy1),marker='x',c='red',s=40)
    pyplot.axis('off')
    pyplot.grid('off')
    pyplot.title('PCA & SVM Recongnize Faces')
    pyplot.show()

（7）調用

if __name__=='__main__':
    pca_svm_pipeline()  #Grid Search and show the results
    pca_svm_time_score_compare()  #Direct Search

結果：（1）降維至10~60多元時，效果最好。

SVM人臉識别

結果：（2）最佳參數以及分類結果，pca降至35維，SCM參數C=1，

{'pca__n_components': 35, 'svm__C': 1}

precision recall f1-score support

0 1.00 0.80 0.89 5

1 1.00 1.00 1.00 1

3 1.00 0.67 0.80 3

4 1.00 1.00 1.00 1

5 1.00 1.00 1.00 1

6 1.00 1.00 1.00 3

7 0.75 1.00 0.86 3

8 1.00 1.00 1.00 1

9 0.67 1.00 0.80 2

10 1.00 1.00 1.00 1

11 1.00 1.00 1.00 1

12 0.50 0.50 0.50 2

13 1.00 1.00 1.00 1

14 1.00 1.00 1.00 3

15 1.00 0.50 0.67 2

17 1.00 1.00 1.00 2

18 1.00 1.00 1.00 2

19 1.00 1.00 1.00 3

20 1.00 1.00 1.00 2

21 1.00 1.00 1.00 2

22 0.67 1.00 0.80 2

23 1.00 0.50 0.67 2

24 1.00 1.00 1.00 4

25 1.00 1.00 1.00 1

26 1.00 1.00 1.00 4

27 1.00 1.00 1.00 4

28 1.00 1.00 1.00 2

29 1.00 1.00 1.00 4

30 1.00 1.00 1.00 6

31 1.00 1.00 1.00 1

32 1.00 1.00 1.00 4

33 1.00 1.00 1.00 4

34 1.00 1.00 1.00 4

35 1.00 1.00 1.00 3

36 1.00 1.00 1.00 5

37 1.00 1.00 1.00 2

38 1.00 1.00 1.00 4

39 0.75 1.00 0.86 3

avg / total 0.96 0.95 0.95 100

SVM人臉識别

SVM人臉識别

繼續閱讀

XGBoost Plotting API以及GBDT組合特征實踐 XGBoost Plotting API以及GBDT組合特征實踐

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

2021-2025年中國運動療法（KT）帶行業市場供需與戰略研究報告

Small tricks

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入