激活函数:Sigmoid
使用的是周志华老师的《机器学习》一书上的更新公式。
一共有三层,第一层是三维的,第二层是4维,输出层是1维。
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <cmath>
using namespace std;
#define innode 3 //输入结点数
#define outnode 1 //输出结点数
#define trainsample 8//BP训练样本数
#define INF 99999999 //定义无穷大
//初始化权值
void initialValue(double **weight1,double **weight2,double *bias1,double *bias2,int n1,int n2,int n3)
{
for(int i=; i<n1; i++)
{
for(int j=; j<n2; j++)
{
//用来设置rand()产生随机数时的随机数种子。参数seed必须是个整数,如果每次seed都设相同值,rand()所产生的随机数值每次就会一样
srand(time(NULL));
weight1[i][j]=rand()%/();
}
}
for(int i=; i<n2; i++)
{
for(int j=; j<n3; j++)
{
srand(time(NULL));
weight2[i][j]=rand()%/();
}
}
for(int i=; i<n2; i++)
{
srand(time(NULL));
bias1[i]=rand()%/();
}
for(int i=; i<n3; i++)
{
srand(time(NULL));
bias2[i]=rand()%/();
}
}
double sigmoid(double x)
{
return /(+exp(-x));
}
//计算样本实际输出
void computeY(double **weight1,double **weight2,double *bias1,double *bias2,int n1,int n2,int n3,double X[innode],double predictY[outnode],double *hideY)
{
double sum=;
//计算隐层输出
for(int i=; i<n2; i++)
{
for(int j=; j<n1; j++)
{
sum += weight1[j][i]*X[j];
}
sum=sigmoid(sum-bias1[i]);
hideY[i]=sum;
}
sum=;
//计算最后一层输出
for(int i=; i<n3; i++)
{
for(int j=; j<n2; j++)
{
sum += weight2[j][i]*hideY[i];
}
sum=sigmoid(sum-bias2[i]);
predictY[i]=sum;
}
}
//计算输出神经元的梯度
void computeOutputDY(int n3,double predictY[outnode],double Y[outnode],double *outputDweight)
{
for(int i=; i<n3; i++)
{
outputDweight[i]=predictY[i]*(-predictY[i])*(Y[i]-predictY[i]);
}
}
//计算隐藏神经元的梯度
void computeHideDY(double **weight2,int n2,int n3,double *hideY,double *outputDweight,double *hideDweight)
{
for(int i=; i<n2; i++)
{
double sum=;
for(int j=; j<n3; j++)
{
sum+=weight2[i][j]*outputDweight[j];
}
hideDweight[i]=hideY[i]*(-hideY[i])*sum;
}
}
//更新权值
void updateWeight(double **weight1,double **weight2,double *bias1,double *bias2,int n1,int n2,int n3,double X[innode],double *hideY,double *outputDweight,double *hideDweight,double ratio)
{
for(int i=; i<n1; i++)
{
for(int j=; j<n2; j++)
{
weight1[i][j]+=ratio*hideDweight[j]*X[i];
}
}
for(int i=; i<n2; i++)
{
for(int j=; j<n3; j++)
{
weight2[i][j]+=ratio*outputDweight[j]*hideY[i];
}
}
for(int i=; i<n2; i++)
{
bias1[i]-=ratio*hideDweight[i];
}
for(int i=; i<n3; i++)
{
bias2[i]-=ratio*outputDweight[i];
}
}
//计算均方误差
double computeError(double predictY[outnode],double Y[outnode],int n)
{
double error=;
for(int i=; i<n; i++)
{
error += ((predictY[i]-Y[i])*(predictY[i]-Y[i]));
}
return error;
}
int main()
{
//三层神经网络,各层的维度
int n1=,n2=,n3=;
cout<<"输入各层的维度:";
cin>>n1>>n2>>n3;
//输入层与隐层的连接权n1xn2,隐层与输出层的连接权n2xn3
double **weight1=new double*[n1];
for(int i=;i<n1;i++)
weight1[i]=new double[n2];
double **weight2=new double*[n2];
for(int i=;i<n2;i++)
weight2[i]=new double[n3];
//隐藏层的梯度项,输出层的梯度项
double *outputDweight = new double[n3];
double *hideDweight = new double[n2];
//隐层的偏置,输出层的偏置
double *bias1 = new double[n2];
double *bias2 = new double[n3];
//输入样本 n1=3,n3=1
double X[trainsample][innode]= {{,,},{,,},{,,},{,,},{,,},{,,},{,,},{,,}};
//期望输出样本
double Y[trainsample][outnode]={{},{},{},{},{},{},{},{}};
//实际输出样本
double **predictY=new double*[trainsample];
for(int i=;i<trainsample;i++)
predictY[i]=new double[outnode];
//隐层输出样本
double *hideY=new double[n2];
//权值,偏置初始化
initialValue(weight1,weight2,bias1,bias2,n1,n2,n3);
double error=INF;
//学习率
double ratio=;
while(error>)
{
error=;
for(int i=; i<trainsample; i++)
{
//计算输出层的值
computeY(weight1,weight2,bias1,bias2,n1,n2,n3,X[i],predictY[i],hideY);
//计算输出层的梯度项
computeOutputDY(n3,predictY[i],Y[i],outputDweight);
//计算隐层的梯度项
computeHideDY(weight2,n2,n3,hideY,outputDweight,hideDweight);
//更新权值
updateWeight(weight1,weight2,bias1,bias2,n1,n2,n3,X[i],hideY,outputDweight,hideDweight,ratio);
//计算均方误差
error += computeError(predictY[i],Y[i],outnode);
}
error=*error;
}
//输出网络的输出与实际输出的对应
for(int i=; i<trainsample; i++)
{
for(int j=; j<outnode; j++)
{
cout<<"predictY[i][j]::"<<predictY[i][j]<<"----"<<"Y[i][j]::"<<Y[i][j]<<endl;
}
}
//释放空间
for(int i=;i<n1;i++)
{
delete [] weight1[i];
}
for(int i=;i<n2;i++)
{
delete [] weight2[i];
}
delete [] weight1;
delete [] weight2;
delete [] bias1;
delete [] bias2;
}