caffe之SoftmaxWithLoss層自定義實作

caffe中的各層實作，因為封裝了各種函數和為了擴充，在提升了效率的同時，降低了一定的代碼可讀性，這裡，為了更好地了解softmax以及caffe中前向傳播和反向傳播的原理，我用通俗易懂的代碼實作了SoftmaxWithLoss層（以下簡稱loss層），進行前向傳播和反向傳播，得到的訓練結果和内置的代碼結果是一樣的。

這裡定義batch_size為網絡輸入的批大小，label_num表示标簽的類别數。而loss層的輸入blob是兩個，一個是全連接配接層，次元是batch_size*label_num，一個是标簽層，次元是label_num*1，為了通俗易懂，我們舉個例子，比如mnist問題的lenLet網絡，是一個10類的分類問題（數字0~9），訓練時，每個batch大小為64，是以，這裡的batch_size=64，label_num=10。這裡Softmax 層的各種原理，以及根據loss反向傳播時的梯度推導，因為這裡寫公式不友善，我就在word裡寫了，如下圖，

然後，貼代碼吧：

頭檔案：

#ifndef CAFFE_MY_LOSS_LAYER_HPP_
#define CAFFE_MY_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"
#include "caffe/layers/softmax_layer.hpp"

namespace caffe {

template <typename Dtype>
class MyLossLayer : public LossLayer<Dtype> {
 public:
  explicit MyLossLayer(const LayerParameter& param)
      : LossLayer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "MyLoss"; }
  virtual inline int ExactNumTopBlobs() const { return 1; }
  virtual inline int MinTopBlobs() const { return 1; }
  virtual inline int MaxTopBlobs() const { return 2; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  vector<vector<Dtype> > prob_;   //儲存置信度
  int label_num;    //标簽個數
  int batch_size;   //批大小

};

}  // namespace caffe

#endif  // CAFFE_MY_LOSS_LAYER_HPP_

源檔案，反向傳播時，按照公式更新梯度就好了

#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layers/my_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
using namespace std;
namespace caffe {

template <typename Dtype>
void MyLossLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::LayerSetUp(bottom, top);
}

template <typename Dtype>
void MyLossLayer<Dtype>::Reshape(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::Reshape(bottom, top);
  this->label_num=bottom[0]->channels();   //标簽數 ，比如mnist為10
  this->batch_size=bottom[0]->num();       //batch大小，比如mnist 一次輸入64個
  this->prob_=vector<vector<Dtype> >(batch_size,vector<Dtype>(label_num,Dtype(0)));  //置信度數組 64*10
}

template <typename Dtype>
void MyLossLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {

	//為了避免數值問題，計算prob_時，先減最大值，再按照softmax公式計算各置信度
	for(int i=0;i<batch_size;++i){
		//求最大值，并減最大值
		Dtype mmax=-10000000;
		for(int j=0;j<label_num;++j)
			mmax=max<Dtype>(mmax,bottom[0]->data_at(i,j,0,0));
		for(int j=0;j<label_num;++j)
			prob_[i][j]=bottom[0]->data_at(i,j,0,0)-mmax;
		Dtype sum=0.0;   //求出分母
		for(int j=0;j<label_num;++j)
			sum+=exp(prob_[i][j]);
		for(int j=0;j<label_num;++j)   //計算各個置信度
			prob_[i][j]=exp(prob_[i][j])/sum;
	}
	//根據計算好的置信度，計算loss
	Dtype loss=0.0;
    const Dtype* label = bottom[1]->cpu_data();   //标簽數組  64
	for(int i=0;i<batch_size;++i){
		int realLabel=static_cast<int>(label[i]);  //圖檔i的真實标簽
		Dtype tmpProb=prob_[i][realLabel];         //屬于真實标簽的置信度
        loss -= log(max<Dtype>(tmpProb,Dtype(FLT_MIN)));   //防止資料溢出問題
	}

    top[0]->mutable_cpu_data()[0] = loss / batch_size;
}

//反向傳播，計算梯度
template <typename Dtype>
void MyLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* label = bottom[1]->cpu_data();   //标簽 

	for(int i=0;i<batch_size;++i){
		int realLabel=static_cast<int>(label[i]);  //圖檔i的真實标簽
		for(int j=0;j<label_num;++j){
			int offset=bottom[0]->offset(i,j);
			if(j==realLabel)                       //按照公式，如果分量就是真實标簽，直接在置信度上減去1，就得到該分量的梯度
				bottom_diff[offset]=prob_[i][j]-1;
			else                                  //否則，梯度等于置信度
				bottom_diff[offset]=prob_[i][j]; 
		}
	}
	for(int i=0;i<bottom[0]->count();++i)   //梯度歸一化，除以batch大小
		bottom_diff[i]/=batch_size;
  }
}


INSTANTIATE_CLASS(MyLossLayer);
REGISTER_LAYER_CLASS(MyLoss);

}  // namespace caffe

編譯好後，用mnist的資料跑一下試試：

layer {
    name: "my_loss"
    type: "MyLoss"
    bottom: "ip2"
    bottom: "label"
    top: "my_loss"
}

最後結果：

caffe之SoftmaxWithLoss層自定義實作

caffe之SoftmaxWithLoss層自定義實作

繼續閱讀

C語言第四章自述2第四章選擇結構程式設計

面試題:vector和map的差別，異同。空間分布，100萬資料存哪個比較合适。一、疊代器差別二、vector三、Map、Set四、vector_map 為什麼比map效率高五、如何選擇六、容器選擇原則七、效率對比

C++ 多線程用條件變量确定線程的執行順序而不是使用 sleep(1)

POJ 1284 Primitive Roots (歐拉函數&原根定理)

CQ V1.0分詞bates(基于雙數組tire樹)—應該是目前最快的中文分詞算法

成員函數初始化清單

2021-08-13c++——類之操作符重載

swmm與lisflood-fp源碼如何一起編譯 CMake指令

Windows下VS開發環境環境安裝工程項目設定關于Debug和Release的提示

一文看懂字元串的加減乘除

C++ 第十五周報告1--《冒泡法排序》

C++實作簡單順序表

C經典書籍筆記——C陷阱與缺陷②(文法陷阱之優先級)一、錯誤案列二、優先級規律

線性表之順序表的實作

C++判斷素數、求最大公約數代碼判斷一個數是否為素數求兩個數的最大公約數

SequoiaDB巨杉資料庫C++驅動概述

caffe之SoftmaxWithLoss層 自定義實作

繼續閱讀

caffe之SoftmaxWithLoss層自定義實作