加載sklearn中的人臉資料集
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()
執行上面的第二行程式,python會從網上下載下傳labeled_face_wild people資料集,這個資料集大概200M,因為牆的原因下載下傳很慢失敗。
使用百度雲下載下傳該資料集,是個
.tgz
的壓縮包
連結:https://pan.baidu.com/s/1eySjV_1K2XYD5YYKCxiVEw
提取碼:3wut
把下載下傳好的壓縮包放入C:\Users\Tim\scikit_learn_data\lfw_home,其中yyy是我的使用者名,再次運作
faces = fetch_lfw_people()
,成功,jupyter notebook中的輸出如下:
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
[\'Donald Rumsfeld\' \'George W Bush\' \'Gerhard Schroeder\' \'Junichiro Koizumi\' \'Tony Blair\'] (964, 62, 47)
# 進行完上一步還可以看一下圖檔長什麼樣子
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
axi.imshow(faces.images[i], cmap=\'bone\')
axi.set(xticks=[], yticks=[],
xlabel=faces.target_names[faces.target[i]])

解決人臉識别(jupyter)
人臉識别是一個分類問題,因為機器學習中svd屬于王霸地位(深度學習不算),是以使用svd對圖像進行訓練。
# svc 支援向量解決分類問題
from sklearn.svm import SVC
# 圖檔的次元太高,降維
from sklearn.decomposition import PCA
# 管道
from sklearn.pipeline import make_pipeline
pca = PCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel=\'rbf\', class_weight=\'balanced\')
model = make_pipeline(pca, svc)
和上一步一樣看看我們資料的同時加載資料
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(3, 5)
for i, axi in enumerate(ax.flat):
axi.imshow(faces.images[i], cmap=\'bone\')
axi.set(xticks=[], yticks=[],
xlabel=faces.target_names[faces.target[i]])
切分訓練集和測試集
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target,
random_state=40)
# 進行訓練
from sklearn.model_selection import GridSearchCV
param_grid = {\'svc__C\': [1, 5, 10],
\'svc__gamma\': [0.0001, 0.0005, 0.001]}
grid = GridSearchCV(model, param_grid)
%time grid.fit(Xtrain, ytrain)
GridSearchCV(cv=\'warn\', error_score=\'raise-deprecating\', estimator=Pipeline(memory=None, steps=[(\'pca\', PCA(copy=True, iterated_power=\'auto\', n_components=150, random_state=42, svd_solver=\'auto\', tol=0.0, whiten=True)), (\'svc\', SVC(C=1.0, cache_size=200, class_weight=\'balanced\', coef0=0.0, decision_function_shape=\'ovr\', degree=3, gamma=\'auto_deprecated\', kernel=\'rbf\', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False))], verbose=False), iid=\'warn\', n_jobs=None, param_grid={\'svc__C\': [1, 5, 10], \'svc__gamma\': [0.0001, 0.0005, 0.001]}, pre_dispatch=\'2*n_jobs\', refit=True, return_train_score=False, scoring=None, verbose=0)
print(grid.best_params_)
{\'svc__C\': 10, \'svc__gamma\': 0.0001}
檢視測試集的測試結果
model = grid.best_estimator_
yfit = model.predict(Xtest)
yfit.shape
import matplotlib as mpl
# 防止中文報錯
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
# 畫圖
fig, ax = plt.subplots(4, 6)
for i, axi in enumerate(ax.flat):
# 調整像素為[62,47]
axi.imshow(Xtest[i].reshape(62, 47), cmap=\'bone\')
axi.set(xticks=[], yticks=[])
# 截取目标名字的最後一組字
axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],
color=\'black\' if yfit[i] == ytest[i] else \'red\')
fig.suptitle(\'預測錯誤的名字被紅色标注\', size=14);
可以看到預測錯誤了四個,準确率欠佳,下面列印分類報告
from sklearn.metrics import classification_report
print(classification_report(ytest, yfit,
target_names=faces.target_names))
precision recall f1-score support Donald Rumsfeld 0.75 0.87 0.81 31 George W Bush 0.97 0.92 0.94 124 Gerhard Schroeder 0.80 0.83 0.81 29 Junichiro Koizumi 1.00 1.00 1.00 16 Tony Blair 0.85 0.85 0.85 41 accuracy 0.90 241 macro avg 0.87 0.89 0.88 241 weighted avg 0.90 0.90 0.90 241
最後使用seaborn的heatmap列印混淆矩陣
import seaborn as sns
from sklearn.metrics import confusion_matrix
# 混淆矩陣
mat = confusion_matrix(ytest, yfit)
# 注意這裡的混淆矩陣的畫圖
sns.heatmap(mat.T, square=True, annot=True, fmt=\'d\', cbar=False,
xticklabels=faces.target_names,
yticklabels=faces.target_names)
plt.xlabel(\'true label\')
plt.ylabel(\'predicted label\');
對于svd不懂得可以轉頭看一下,svm原理