語義分割學習筆記（三）——SegNet Upsample層解析

1 參數設定

message UpsampleParameter {
  // DEPRECATED. No need to specify upsampling scale factors when
  // exact output shape is given by upsample_h, upsample_w parameters.
  optional uint32 scale = 1 [default = 2];
  // DEPRECATED. No need to specify upsampling scale factors when
  // exact output shape is given by upsample_h, upsample_w parameters.
  optional uint32 scale_h = 2;
  // DEPRECATED. No need to specify upsampling scale factors when
  // exact output shape is given by upsample_h, upsample_w parameters.
  optional uint32 scale_w = 3;
  // DEPRECATED. Specify exact output height using upsample_h. This
  // parameter only works when scale is 2
  optional bool pad_out_h = 4 [default = false];
  // DEPRECATED. Specify exact output width using upsample_w. This
  // parameter only works when scale is 2
  optional bool pad_out_w = 5 [default = false];
  optional uint32 upsample_h = 6;
  optional uint32 upsample_w = 7;
}

可設定參數為：

scale

scale_h scale_w

pad_out_h pad_out_w

upsample_h upsample_w

2 top層特征圖大小計算

（1）先判斷是否指定 upsample_hupsample_w，如果指定，大小為指定大小，否則（2）

（2）判斷是否指定 scale_h，如果未指定， scale_h_ = scale_w_ = scale，否則（3）

（3）scale_h_ = scale_h scale_w_=scale_w

隻有scale_h_ = scale_w_ =2時，才可以指定pad_out_h，pad_out_w，否則錯誤，如果是（2）（3）則top特征圖大小為：

upsample_h_ = bottom[0]->height() * scale_h_ - int(pad_out_h)

upsample_w_ = bottom[0]->width() * scale_w_ - int(pad_out_w)

注：

（1）如果輸入圖像的高和寬不是32的整數倍，需要指定upsample_h, upsample_w的大小，不然會出現次元不一緻的錯誤，原因是upsample需要借助編碼過程中pool層的位置資訊，例如： pool前特征圖大小為45， pool後為23，如果直接對23 unsample，其大小為46，而pool産生的位置圖大小為45，造成upsample時大小不一緻；

（2）指定upsample_h upsample_w的大小時，需要根據編碼過程中對應pool特征圖的大小，來設定upsample的大小，例如樣例proto中輸入圖像大小為480*360，以360分析：360—pool1（180）—pool2 （90）—pool3 （45）—pool4（23）—pool5（12）， upsample5需要借助pool4位置資訊，需要與pool4大小一緻，是以upsamle_h=23 ~

3 源碼

#include <algorithm>
#include <cfloat>
#include <vector>
#include <iostream>

#include "caffe/layers/upsample_layer.hpp"

namespace caffe {

template <typename Dtype>
void UpsampleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  UpsampleParameter upsample_param = this->layer_param_.upsample_param();
  CHECK((upsample_param.has_upsample_h() && upsample_param.has_upsample_w())
      || (!upsample_param.has_scale() && upsample_param.has_scale_h()
      && upsample_param.has_scale_w())
      || (!upsample_param.has_scale_h() && !upsample_param.has_scale_w()))
      << "upsample_h & upsample_w are required, else (DEPRECATED) "
      << "scale OR scale_h & scale_w are required.";

  if (upsample_param.has_upsample_h() && upsample_param.has_upsample_w()) {
    upsample_h_ = upsample_param.upsample_h(); //根據upsample_h upsample_w參數設定
    upsample_w_ = upsample_param.upsample_w();
    CHECK_GT(upsample_h_, 1);
    CHECK_GT(upsample_w_, 1);
  } else {
    LOG(INFO) << "Params 'pad_out_{}_' are deprecated. Please declare upsample"
        << " height and width useing the upsample_h, upsample_w parameters.";
    if (!upsample_param.has_scale_h()) { //根據scale設定  沒有scale_h直接根據scale設定
      scale_h_ = scale_w_ = upsample_param.scale();
      CHECK_GT(scale_h_, 1);
    } else {
      scale_h_ = upsample_param.scale_h();
      scale_w_ = upsample_param.scale_w();
      CHECK_GT(scale_h_, 1);
      CHECK_GT(scale_w_, 1);
    }
    pad_out_h_ = upsample_param.pad_out_h();
    pad_out_w_ = upsample_param.pad_out_w();
    CHECK(!pad_out_h_ || scale_h_ == 2)  //隻有scale_h scale_w=2時，才可以指定
        << "Output height padding compensation requires scale_h == 2, otherwise "
        << "the output size is ill-defined.";
    CHECK(!pad_out_w_ || scale_w_ == 2) 
        << "Output width padding compensation requires scale_w == 2, otherwise "
        << "the output size is ill-defined.";
    upsample_h_ = upsample_w_ = -1;  // flag to calculate in Reshape
  }
}

template <typename Dtype>
void UpsampleLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  CHECK_EQ(4, bottom[1]->num_axes()) << "Input mask must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  CHECK_EQ(bottom[0]->num(), bottom[1]->num());
  CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
  CHECK_EQ(bottom[0]->height(), bottom[1]->height());
  CHECK_EQ(bottom[0]->width(), bottom[1]->width());

  if (upsample_h_ <= 0 || upsample_w_ <= 0) {
    upsample_h_ = bottom[0]->height() * scale_h_ - int(pad_out_h_); // upsample_h_ = height*scale-pad_out 
    upsample_w_ = bottom[0]->width() * scale_w_ - int(pad_out_w_);
  }
  top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), upsample_h_,
      upsample_w_);
  channels_ = bottom[0]->channels();
  height_ = bottom[0]->height();
  width_ = bottom[0]->width();
}

template <typename Dtype>
void UpsampleLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* bottom_mask_data = bottom[1]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();

  // Initialize
  const int top_count = top[0]->count();
  caffe_set(top_count, Dtype(0), top_data);
  // The main loop
  for (int n = 0; n < bottom[0]->num(); ++n) {
    for (int c = 0; c < channels_; ++c) {
      for (int i = 0; i < height_ * width_; ++i) {
        const int idx = static_cast<int>(bottom_mask_data[i]);
        if (idx >= upsample_h_ * upsample_w_) {
          // this can happen if the pooling layer that created the input mask
          // had an input with different size to top[0]
          LOG(FATAL) << "upsample top index " << idx << " out of range - "
            << "check scale settings match input pooling layer's "
            << "downsample setup";
        }
        top_data[idx] = bottom_data[i];
      }
      // compute offset
      bottom_data += bottom[0]->offset(0, 1);
      bottom_mask_data += bottom[1]->offset(0, 1);
      top_data += top[0]->offset(0, 1);
    }
  }
}

template <typename Dtype>
void UpsampleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_mask_data = bottom[1]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    const int bottom_count = bottom[0]->count();
    caffe_set(bottom_count, Dtype(0), bottom_diff);
    // The main loop
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int i = 0; i < height_ * width_; ++i) {
          const int idx = static_cast<int>(bottom_mask_data[i]);
          if (idx >= height_ * width_ * scale_h_ * scale_w_) {
            // this can happen if the pooling layer that created
            // the input mask had an input with different size to top[0]
            LOG(FATAL) << "upsample top index " << idx << " out of range - "
              << "check scale settings match input pooling layer's downsample setup";
          }
          bottom_diff[i] = top_diff[idx];
        }
        // compute offset
        bottom_diff += bottom[0]->offset(0, 1);
        bottom_mask_data += bottom[1]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
      }
    }
  }
}


#ifdef CPU_ONLY
STUB_GPU(UpsampleLayer);
#endif

INSTANTIATE_CLASS(UpsampleLayer);
REGISTER_LAYER_CLASS(Upsample);

}  // namespace caffe

語義分割學習筆記（三）——SegNet Upsample層解析

繼續閱讀

簡單文檔分類——樸素貝葉斯算法樸素貝葉斯算法簡單文檔分類執行個體步驟總結樸素貝葉斯分類調用(sklearn)

考證大全 | 證券從業資格考試

敲黑闆！2021年證券從業考試考點預測

2021年銀行從業考試考情介紹,果斷收藏!

證券從業合格證書什麼時候列印？有哪些注意事項？

【幹貨滿滿】初級銀行從業考試《個人理财》重點梳理

2020年經濟師考試，難嗎？

初級銀行從業資格證有什麼用？

MBA提前面試純幹貨分享

MBA值得學麼

吳恩達logistic回歸實作

【人工智能行業大師訪談1】吳恩達采訪 Geoffery Hinton

深度學習模型分析人類複雜疾病的準确性

【趨高機器視覺】機器視覺技術原了解析及解決方案

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

cs231n斯坦福基于卷積神經網絡的CV學習筆記（一）KNN和線性分類器/分類器損失/反向傳播一，KNN圖像分類算法二，線性分類器三，線性分類器損失四，反向傳播五，神經網絡