天天看点

吴恩达机器学习课后习题---week4反向传播神经网络题目:数据集:步骤:python代码:

题目:

使用神经网络算法识别数据集中的手写数字,

数据集包含有:数字集与初始theta值。、

数据集:

https://www.heywhale.com/mw/project/5da6bd34c83fb40042068a41/dataset

步骤:

构建神经网络模型——初始化向量——向前传播算法——计算代价函数——反向传播,计算偏导数项——(梯度检验)——高级优化算法下降梯度得到预测值theta——对比预测数据得出准确率。

python代码:

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
#第一步 导入数据
data = loadmat("C:/Users/Administrator/Desktop/吴恩达机器学习数据集/week1/ex4data1.mat")
weight = loadmat("C:/Users/Administrator/Desktop/吴恩达机器学习数据集/week1/ex4weights.mat")
X = data['X'] #输入集X[5000,400]
y = data['y'] #输出集y[5000,1]
theta1 = weight['Theta1']  #theta1矩阵[25,401]  隐藏层有25个信号单元
theta2 = weight['Theta2']  #theta2矩阵[10,26]    输出层有10个信号单元
encoder = OneHotEncoder(sparse=False)  #sparse表示编码格式,为False时显示为10个元素的向量
y_onehot = encoder.fit_transform(y)    #转化成向量格式

#第二步
# 1、从5000个数据中随机抽取100个数据
sample_idx = np.random.choice(np.arange(X.shape[0]),100)  #随机抽取100个0-4999的数
sample_images = X[sample_idx,:]    #sample_image记录随机100个x的行数据
# 2、将100个数据重构成(20,20)的矩阵显示出来
fig,ax_array = plt.subplots(nrows=10, ncols=10, sharex=True, sharey=True, figsize=(8,8))
for i in range(10):
    for j in range(10):
        ax_array[i, j].matshow(np.array(sample_images[10*i+j].reshape((20,20))).T, cmap=matplotlib.cm.binary)
        '''matshow函数这是个绘制矩阵的函数:matplotlib.pyplot.matshow(A, fignum=None, **kwargs)A是绘制的矩阵,一个矩阵元素对应一个图像像素。'''
        plt.xticks(np.array([])) #表示x轴刻度,没有刻度
        plt.yticks(np.array([]))

#第三步 定义代价函数 和 前馈神经网络函数(返回各层激活值和输出H(x))
#1、定义h函数
def sigmoid(z):
    return 1/(1+np.exp(-z))
#2、定义前馈神经网络
def forward_propagate(X, theta1, theta2):   #前馈神经网络
    m = X.shape[0]
    a1 = np.insert(X, 0, values=np.ones(m), axis=1)
    z2 = a1 * theta1.T
    a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)
    z3 = a2 * theta2.T
    h = sigmoid(z3)
    return a1, z2, a2, z3, h
#3、未正则化的初始代价函数
def cost(X,y):
        X = np.matrix(X)
        y = np.matrix(y)
        m = X.shape[0]  #外层累加函数为5000层
        a1,z2,a2,z3,h = forward_propagate(X,theta1,theta2)  #h为前馈神经网络的计算值(5000,10)
        J = 0
        for i in range(m):
            first = np.multiply(-y[i,:],np.log(h[i,:]))
            second = np.multiply((1 - y[i,:]),np.log(1-h[i,:])) #y[i,:]表示5000个里的一个对应的特征。h[i,:]同样对应5000个里面的            J += np.sum(first - second)
            J += np.sum(first - second)  # J为总代价,每次加的是一个数据的代价
        J = J / m
        return J
J_primary = cost(X, y_onehot)
print("未正则化的初始代价为:", J_primary)
#4、计算正则化后的初始代价函数  正则化项,theta1的平方和+theta2的平方和为主项。
def costReg(theta1,theta2,X,y,learnintrate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    J = 0
    for i in range(m):
        first = np.multiply(y[i,:], np.log(h[i,:]))
        second = np.multiply(1-y[i,:], np.log(1-h[i,:]))
        J += np.sum(first + second)
    J = -J/m
    J += ((learnintrate)/ (2*m)) * (np.sum(np.power(theta1[:,1:],2)) + np.sum(np.power(theta2[:,1:],2)))#正则化项,theta1的平方和+theta2的平方和为主项
    return  J
J_cost = costReg(theta1,theta2,X,y_onehot,1)
print("正则化的初始代价和为:", J_cost)

#第四步 定义sigmoid函数的导数函数,并初始化各个属性值
#1、定义sigmoid梯度导数函数
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))
#2、随机初始化
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
params = (np.random.random(size=hidden_size*(input_size+1)+num_labels*(hidden_size+1))-0.5)*0.24
'''将theta初始值设定在(-0.12,0.12)中
params为随机产生的在(-0.12,0.12),size=10285,params的shape(10285,1)10285为所有theta的值'''

#第五步 实现反向传播算法
'''
实现反向传播算法。
1、使用数据集X,向量化的y_onehot,初始化好的params分解成为theta1与theta2,使用前馈神经网络得出其中各层的数值与最后的结果;
2、初始化delta1,delta2与theta1,theta2形式相同并初始值为0;
3、根据前馈神经网络结果h计算正则化代价;
4、使用for循环分解出每个数据的各层神经网络的值,反向传播算法得出delta1和delta2的值,并使其一维化返回grad
'''
def backpropReg(params,input_size,hidden_size,num_labels,X,y,learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))  # 应得出(25,401)的矩阵
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))  # 应得出(10,26)的矩阵

    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)  # theta1,theta2为更新后的theta
    h = np.matrix(h) #(5000,10)

    J = 0
    delta1 = np.zeros(theta1.shape) #(25,401)
    delta2 = np.zeros(theta2.shape) #(10,26)
    #正则化代价函数
    for i in range(m):
        first = np.multiply(y[i, :], np.log(h[i, :]))
        second = np.multiply((1 - y[i, :]), np.log(1 - h[i, :]))
        J += np.sum(first + second)
    J = -J / m
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2)))

    for t in range(m): #赋值单个训练集的输入值、激活量、输出值、误差值
        a1t = a1[t, :]  # (1,401)  #第t个的输入层
        z2t = z2[t, :]  # (1,25)
        a2t = a2[t, :]  # (1,26)    #第t个的隐藏层
        ht = h[t, :]  # (1,10)      #第t个的输出层
        yt = y[t, :]  # (1,10)

        #计算每一层的误差值
        d3t = ht - yt  # (1,10)  #输出层的误差
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1,26),反向传播算法z也需要加偏置值
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t)) #隐藏层的误差(1,26)

        #计算代价函数偏导数
        delta1 += (d2t[:, 1:]).T * a1t  #去除第一行 (25,401)
        delta2 += d3t.T * a2t  # d3t是结果集的误差值,不需要去除第一行,delta2计算为(10,26)
    delta1 = delta1 / m
    delta2 = delta2 / m
    delta1[:, 1:] = delta1[:, 1:] + (theta1[:, 1:] * learning_rate) / m
    delta2[:, 1:] = delta2[:, 1:] + (theta2[:, 1:] * learning_rate) / m
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))  # grad为一维向量
    return J, grad

#第六步 高级优化算法求梯度,预测
#1、高级优化算法求梯度theta
from scipy.optimize import minimize
print("计算fmin:") #跑程序大约需要两分钟
fmin = minimize(fun=backpropReg, x0=(params), args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate),
               method='TNC', jac=True, options={'maxiter': 250})
print(fmin) #fmin中x为返回的向量,是10285个属性一维向量,随机选取训练集X中的任意100行,训练出的theta向量不唯一
#2、返回预测值
X = np.matrix(X)
thetafinal1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
thetafinal2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1,z2,a2,z3,h = forward_propagate(X,thetafinal1,thetafinal2)  #前导神经
y_pred = np.array(np.argmax(h,axis=1)+1) #返回预测值最大值
#3、预测准确率
from sklearn.metrics import classification_report
print(classification_report(y,y_pred))
           
吴恩达机器学习课后习题---week4反向传播神经网络题目:数据集:步骤:python代码: