天天看點

吳恩達機器學習課後習題---week4反向傳播神經網絡題目:資料集:步驟:python代碼:

題目:

使用神經網絡算法識别資料集中的手寫數字,

資料集包含有:數字集與初始theta值。、

資料集:

https://www.heywhale.com/mw/project/5da6bd34c83fb40042068a41/dataset

步驟:

建構神經網絡模型——初始化向量——向前傳播算法——計算代價函數——反向傳播,計算偏導數項——(梯度檢驗)——進階優化算法下降梯度得到預測值theta——對比預測資料得出準确率。

python代碼:

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
#第一步 導入資料
data = loadmat("C:/Users/Administrator/Desktop/吳恩達機器學習資料集/week1/ex4data1.mat")
weight = loadmat("C:/Users/Administrator/Desktop/吳恩達機器學習資料集/week1/ex4weights.mat")
X = data['X'] #輸入集X[5000,400]
y = data['y'] #輸出集y[5000,1]
theta1 = weight['Theta1']  #theta1矩陣[25,401]  隐藏層有25個信号單元
theta2 = weight['Theta2']  #theta2矩陣[10,26]    輸出層有10個信号單元
encoder = OneHotEncoder(sparse=False)  #sparse表示編碼格式,為False時顯示為10個元素的向量
y_onehot = encoder.fit_transform(y)    #轉化成向量格式

#第二步
# 1、從5000個資料中随機抽取100個資料
sample_idx = np.random.choice(np.arange(X.shape[0]),100)  #随機抽取100個0-4999的數
sample_images = X[sample_idx,:]    #sample_image記錄随機100個x的行資料
# 2、将100個資料重構成(20,20)的矩陣顯示出來
fig,ax_array = plt.subplots(nrows=10, ncols=10, sharex=True, sharey=True, figsize=(8,8))
for i in range(10):
    for j in range(10):
        ax_array[i, j].matshow(np.array(sample_images[10*i+j].reshape((20,20))).T, cmap=matplotlib.cm.binary)
        '''matshow函數這是個繪制矩陣的函數:matplotlib.pyplot.matshow(A, fignum=None, **kwargs)A是繪制的矩陣,一個矩陣元素對應一個圖像像素。'''
        plt.xticks(np.array([])) #表示x軸刻度,沒有刻度
        plt.yticks(np.array([]))

#第三步 定義代價函數 和 前饋神經網絡函數(傳回各層激活值和輸出H(x))
#1、定義h函數
def sigmoid(z):
    return 1/(1+np.exp(-z))
#2、定義前饋神經網絡
def forward_propagate(X, theta1, theta2):   #前饋神經網絡
    m = X.shape[0]
    a1 = np.insert(X, 0, values=np.ones(m), axis=1)
    z2 = a1 * theta1.T
    a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)
    z3 = a2 * theta2.T
    h = sigmoid(z3)
    return a1, z2, a2, z3, h
#3、未正則化的初始代價函數
def cost(X,y):
        X = np.matrix(X)
        y = np.matrix(y)
        m = X.shape[0]  #外層累加函數為5000層
        a1,z2,a2,z3,h = forward_propagate(X,theta1,theta2)  #h為前饋神經網絡的計算值(5000,10)
        J = 0
        for i in range(m):
            first = np.multiply(-y[i,:],np.log(h[i,:]))
            second = np.multiply((1 - y[i,:]),np.log(1-h[i,:])) #y[i,:]表示5000個裡的一個對應的特征。h[i,:]同樣對應5000個裡面的            J += np.sum(first - second)
            J += np.sum(first - second)  # J為總代價,每次加的是一個資料的代價
        J = J / m
        return J
J_primary = cost(X, y_onehot)
print("未正則化的初始代價為:", J_primary)
#4、計算正則化後的初始代價函數  正則化項,theta1的平方和+theta2的平方和為主項。
def costReg(theta1,theta2,X,y,learnintrate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    J = 0
    for i in range(m):
        first = np.multiply(y[i,:], np.log(h[i,:]))
        second = np.multiply(1-y[i,:], np.log(1-h[i,:]))
        J += np.sum(first + second)
    J = -J/m
    J += ((learnintrate)/ (2*m)) * (np.sum(np.power(theta1[:,1:],2)) + np.sum(np.power(theta2[:,1:],2)))#正則化項,theta1的平方和+theta2的平方和為主項
    return  J
J_cost = costReg(theta1,theta2,X,y_onehot,1)
print("正則化的初始代價和為:", J_cost)

#第四步 定義sigmoid函數的導數函數,并初始化各個屬性值
#1、定義sigmoid梯度導數函數
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))
#2、随機初始化
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
params = (np.random.random(size=hidden_size*(input_size+1)+num_labels*(hidden_size+1))-0.5)*0.24
'''将theta初始值設定在(-0.12,0.12)中
params為随機産生的在(-0.12,0.12),size=10285,params的shape(10285,1)10285為所有theta的值'''

#第五步 實作反向傳播算法
'''
實作反向傳播算法。
1、使用資料集X,向量化的y_onehot,初始化好的params分解成為theta1與theta2,使用前饋神經網絡得出其中各層的數值與最後的結果;
2、初始化delta1,delta2與theta1,theta2形式相同并初始值為0;
3、根據前饋神經網絡結果h計算正則化代價;
4、使用for循環分解出每個資料的各層神經網絡的值,反向傳播算法得出delta1和delta2的值,并使其一維化傳回grad
'''
def backpropReg(params,input_size,hidden_size,num_labels,X,y,learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))  # 應得出(25,401)的矩陣
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))  # 應得出(10,26)的矩陣

    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)  # theta1,theta2為更新後的theta
    h = np.matrix(h) #(5000,10)

    J = 0
    delta1 = np.zeros(theta1.shape) #(25,401)
    delta2 = np.zeros(theta2.shape) #(10,26)
    #正則化代價函數
    for i in range(m):
        first = np.multiply(y[i, :], np.log(h[i, :]))
        second = np.multiply((1 - y[i, :]), np.log(1 - h[i, :]))
        J += np.sum(first + second)
    J = -J / m
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2)))

    for t in range(m): #指派單個訓練集的輸入值、激活量、輸出值、誤內插補點
        a1t = a1[t, :]  # (1,401)  #第t個的輸入層
        z2t = z2[t, :]  # (1,25)
        a2t = a2[t, :]  # (1,26)    #第t個的隐藏層
        ht = h[t, :]  # (1,10)      #第t個的輸出層
        yt = y[t, :]  # (1,10)

        #計算每一層的誤內插補點
        d3t = ht - yt  # (1,10)  #輸出層的誤差
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1,26),反向傳播算法z也需要加偏置值
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t)) #隐藏層的誤差(1,26)

        #計算代價函數偏導數
        delta1 += (d2t[:, 1:]).T * a1t  #去除第一行 (25,401)
        delta2 += d3t.T * a2t  # d3t是結果集的誤內插補點,不需要去除第一行,delta2計算為(10,26)
    delta1 = delta1 / m
    delta2 = delta2 / m
    delta1[:, 1:] = delta1[:, 1:] + (theta1[:, 1:] * learning_rate) / m
    delta2[:, 1:] = delta2[:, 1:] + (theta2[:, 1:] * learning_rate) / m
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))  # grad為一維向量
    return J, grad

#第六步 進階優化算法求梯度,預測
#1、進階優化算法求梯度theta
from scipy.optimize import minimize
print("計算fmin:") #跑程式大約需要兩分鐘
fmin = minimize(fun=backpropReg, x0=(params), args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate),
               method='TNC', jac=True, options={'maxiter': 250})
print(fmin) #fmin中x為傳回的向量,是10285個屬性一維向量,随機選取訓練集X中的任意100行,訓練出的theta向量不唯一
#2、傳回預測值
X = np.matrix(X)
thetafinal1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
thetafinal2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1,z2,a2,z3,h = forward_propagate(X,thetafinal1,thetafinal2)  #前導神經
y_pred = np.array(np.argmax(h,axis=1)+1) #傳回預測值最大值
#3、預測準确率
from sklearn.metrics import classification_report
print(classification_report(y,y_pred))
           
吳恩達機器學習課後習題---week4反向傳播神經網絡題目:資料集:步驟:python代碼: