在Caffe下自定義網絡層 Interp層

之前跑 Residual Attention Network for Image Classification（2017年CVPR）發現Interp層在Caffe中沒有定義（論文中用來上采樣），于是從網上查找資料自己定義一個Interp層。

第一步先：自定義Interp網絡層：

1.1 在caffe/include/cafffe/layers目錄下建立一個檔案 interp_layer.hpp ，檔案内容如下：

#ifndef CAFFE_INTERP_LAYER_HPP_
#define CAFFE_INTERP_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
/**
 * @brief Changes the spatial resolution by bi-linear interpolation.
 *        The target size is specified in terms of pixels. 
 *        The start and end pixels of the input are mapped to the start
 *        and end pixels of the output.
 */
template <typename Dtype>
class InterpLayer : public Layer<Dtype> {
 public:
  explicit InterpLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "Interp"; }
  virtual inline int ExactNumBottomBlobs() const { return 1; }　　　　　//此處可以根據需求修改Interp層的輸入個數，預設是1；下同
  virtual inline int ExactNumTopBlobs() const { return 1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  
  int num_, channels_;
  int height_in_, width_in_;
  int height_out_, width_out_;
  int pad_beg_, pad_end_;
  int height_in_eff_, width_in_eff_;
};

}  // namespace caffe

#endif // CAFFE_CONV_LAYER_HPP_

1.2 在caffe/include/caffe/util檔案夾中建立一個interp.hpp，代碼如下：

#ifndef CAFFE_UTIL_INTERP_H_
#define CAFFE_UTIL_INTERP_H_

#include <cublas_v2.h>
#include "caffe/proto/caffe.pb.h"

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image

template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
          Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
          Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

// Backward (adjoint) operation
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
      Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
      Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels);

template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels);

  /*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2);
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2);
  */

}  // namespace caffe

#endif

1.3 在caffe/include/caffe檔案夾中建立 common.cuh，代碼如下：

#ifndef CAFFE_COMMON_CUH_
#define CAFFE_COMMON_CUH_

#include <cuda.h>

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 　　//注意标紅需要添加，不然會報錯（根據自己伺服器CUDA配置需求添加）

#else
// CUDA: atomicAdd is not defined for doubles
static __inline__ __device__ double atomicAdd(double *address, double val) {
  unsigned long long int* address_as_ull = (unsigned long long int*)address;
  unsigned long long int old = *address_as_ull, assumed;
  if (val==0.0)
    return __longlong_as_double(old);
  do {
    assumed = old;
    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
  } while (assumed != old);
  return __longlong_as_double(old);
}

#endif
#endif

1.4 在caffe/src/caffe/layers檔案夾下面建立 interp_layer.cpp，代碼如下：

#include <vector>

#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/interp.hpp"
#include "caffe/layers/interp_layer.hpp"

namespace caffe {

template <typename Dtype>
void InterpLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  InterpParameter interp_param = this->layer_param_.interp_param();
  pad_beg_ = interp_param.pad_beg();
  pad_end_ = interp_param.pad_end();
  CHECK_LE(pad_beg_, 0) << "Only supports non-pos padding (cropping) for now";
  CHECK_LE(pad_end_, 0) << "Only supports non-pos padding (cropping) for now";
}

template <typename Dtype>
void InterpLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  num_ = bottom[0]->num();
  channels_ = bottom[0]->channels();
  height_in_ = bottom[0]->height();
  width_in_ = bottom[0]->width();
  height_in_eff_ = height_in_ + pad_beg_ + pad_end_;
  width_in_eff_ = width_in_ + pad_beg_ + pad_end_;
  InterpParameter interp_param = this->layer_param_.interp_param();
  if (interp_param.has_shrink_factor() &&
      !interp_param.has_zoom_factor()) {
    const int shrink_factor = interp_param.shrink_factor();
    CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
    height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
    width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
  } else if (interp_param.has_zoom_factor() &&
             !interp_param.has_shrink_factor()) {
    const int zoom_factor = interp_param.zoom_factor();
    CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
    height_out_ = height_in_eff_ + (height_in_eff_ - 1) * (zoom_factor - 1);
    width_out_ = width_in_eff_ + (width_in_eff_ - 1) * (zoom_factor - 1);
  } else if (interp_param.has_height() && interp_param.has_width()) {
    height_out_  = interp_param.height();
    width_out_  = interp_param.width();
  } else if (interp_param.has_shrink_factor() &&
             interp_param.has_zoom_factor()) {
    const int shrink_factor = interp_param.shrink_factor();
    const int zoom_factor = interp_param.zoom_factor();
    CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
    CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
    height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
    width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
    height_out_ = height_out_ + (height_out_ - 1) * (zoom_factor - 1);
    width_out_ = width_out_ + (width_out_ - 1) * (zoom_factor - 1);
  } else {
    LOG(FATAL);
  }
  CHECK_GT(height_in_eff_, 0) << "height should be positive";
  CHECK_GT(width_in_eff_, 0) << "width should be positive";
  CHECK_GT(height_out_, 0) << "height should be positive";
  CHECK_GT(width_out_, 0) << "width should be positive";
  top[0]->Reshape(num_, channels_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  caffe_cpu_interp2<Dtype,false>(num_ * channels_,
    bottom[0]->cpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->mutable_cpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff());
  caffe_cpu_interp2_backward<Dtype,false>(num_ * channels_,
    bottom[0]->mutable_cpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->cpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

#ifndef CPU_ONLY
template <typename Dtype>
void InterpLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  caffe_gpu_interp2<Dtype,false>(num_ * channels_,
    bottom[0]->gpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->mutable_gpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
  caffe_gpu_interp2_backward<Dtype,false>(num_ * channels_,
    bottom[0]->mutable_gpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->gpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
#endif

#ifdef CPU_ONLY
STUB_GPU(InterpLayer);
#endif

INSTANTIATE_CLASS(InterpLayer);
REGISTER_LAYER_CLASS(Interp);

} // namespace caffe

1.5 在caffe/src/caffe/util檔案夾中建立 interp.cpp，代碼如下：

#include "caffe/common.hpp"
#include "caffe/util/interp.hpp"
#include <algorithm>
#include <cmath>

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  // special case: just copy
  if (height1 == height2 && width1 == width2) {
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = w2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
        pos2[0] = pos1[0];
        pos1++;
        pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos2[0] = pos1[0];
        pos1 += Width1 * Height1;
        pos2 += Width2 * Height2;
      }
    }
      }
    }
    return;
  }
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  for (int h2 = 0; h2 < height2; ++h2) {
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    for (int w2 = 0; w2 < width2; ++w2) {
      const float w1r = rwidth * w2;
      const int w1 = w1r;
      const int w1p = (w1 < width1 - 1) ? 1 : 0;
      const Dtype w1lambda = w1r - w1;
      const Dtype w0lambda = Dtype(1.) - w1lambda;
      if (packed) {
    const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos2[0] =
        h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[channels * w1p]) + 
        h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
      pos1++;
      pos2++;
    }
      }
      else {
    const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos2[0] =
        h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[w1p]) + 
        h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
    }
  }
}


// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  // special case: same-size matching grids
  if (height1 == height2 && width1 == width2) {
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = w2;
    if (packed) {
      Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
        pos1[0] += pos2[0];
        pos1++;
        pos2++;
      }
    }
    else {
      Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos1[0] += pos2[0];
        pos1 += Width1 * Height1;
        pos2 += Width2 * Height2;
      }
    }
      }
    }
    return;
  }
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  for (int h2 = 0; h2 < height2; ++h2) {
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    for (int w2 = 0; w2 < width2; ++w2) {
      const float w1r = rwidth * w2;
      const int w1 = w1r;
      const int w1p = (w1 < width1 - 1) ? 1 : 0;
      const Dtype w1lambda = w1r - w1;
      const Dtype w0lambda = Dtype(1.) - w1lambda;
      if (packed) {
    Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += h0lambda * w0lambda * pos2[0];
      pos1[channels * w1p] += h0lambda * w1lambda * pos2[0];
      pos1[channels * h1p * Width1] += h1lambda * w0lambda * pos2[0];
      pos1[channels * (h1p * Width1 + w1p)] += h1lambda * w1lambda * pos2[0];
      pos1++;
      pos2++;
    }
      }
      else {
    Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += h0lambda * w0lambda * pos2[0];
      pos1[w1p] += h0lambda * w1lambda * pos2[0];
      pos1[h1p * Width1] += h1lambda * w0lambda * pos2[0];
      pos1[h1p * Width1 + w1p] += h1lambda * w1lambda * pos2[0];
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
    }
  }
}

// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels) {
  CHECK(height > 0 && width > 0 && levels >= 0);
  int height1 = height, width1 = width;
  int height2 = height, width2 = width;
  const Dtype *data1 = data;
  Dtype *data2 = data_pyr;
  for (int l = 0; l < levels; ++l) {
    height2 /= 2;
    width2 /= 2;
    if (height2 == 0 || width2 == 0) {
      break;
    }
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = 2 * h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = 2 * w2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
      Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos2[0] =  static_cast<Dtype>(.25) *
          (pos1[0]                 + pos1[channels] + 
           pos1[channels * width1] + pos1[channels * (width1 + 1)]);
        pos1++;
        pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[h1 * width1 + w1];
      Dtype* pos2 = &data2[h2 * width2 + w2];
      for (int c = 0; c < channels; ++c) {
        pos2[0] =  static_cast<Dtype>(.25) *
          (pos1[0]      + pos1[1] + 
           pos1[width1] + pos1[width1 + 1]);
        pos1 += width1 * height1;
        pos2 += width2 * height2;
      }
    }
      }
    }
    data1 = data2;
    height1 = height2;
    width1 = width2;
    data2 += channels * height2 * width2;
  }
}

  /*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2) {
  const int num1 = mosaic_params1.rects_size();
  const int num2 = mosaic_params2.rects_size();
  CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
  const int num = std::max(num1, num2);
  for (int i = 0; i < num; ++i) {
    const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
    const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
    int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
    level = std::max(0, std::min(levels, level));
    if (data_pyr == 0 || level == 0) {
      caffe_cpu_interp2<Dtype,packed>(channels,
      data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
    }
    else {
      const Dtype *data_pyr_l = data_pyr;
      int factor = 2;
      for (int l = 1; l < level; ++l) {
    data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
    factor *= 2;
      }
      caffe_cpu_interp2<Dtype,packed>(channels,
      data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());      
    }
  }
}
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2) {
  const int num1 = mosaic_params1.rects_size();
  const int num2 = mosaic_params2.rects_size();
  CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
  const int num = std::max(num1, num2);
  for (int i = 0; i < num; ++i) {
    const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
    const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
    int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
    level = std::max(0, std::min(levels, level));
    if (data_pyr == 0 || level == 0) {
      caffe_gpu_interp2<Dtype,packed>(channels,
      data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
    }
    else {
      const Dtype *data_pyr_l = data_pyr;
      int factor = 2;
      for (int l = 1; l < level; ++l) {
    data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
    factor *= 2;
      }
      caffe_gpu_interp2<Dtype,packed>(channels,
      data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());      
    }
  }
}
  */

// Explicit instances
template void caffe_cpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);

template void caffe_cpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);

template void caffe_cpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_cpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);

  /*
template void caffe_cpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_cpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
  */

} // namespace caffe

1.6 在caffe/src/caffe/util檔案夾中建立 interp.cu，代碼如下：

#include "caffe/common.hpp"
#include "caffe/common.cuh"
#include "caffe/util/interp.hpp"

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel(const int n, const float rheight, const float rwidth,
    const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    // special case: just copy
    if (height1 == height2 && width1 == width2) {
      const int h1 = h2;
      const int w1 = w2;
      if (packed) {
    const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos2[0] = pos1[0];
      pos1++;
      pos2++;
    }
      }
      else {
    const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
    pos2[0] = pos1[0];
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
    }
      }
      return;
    }
    //
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    //
    const float w1r = rwidth * w2;
    const int w1 = w1r;
    const int w1p = (w1 < width1 - 1) ? 1 : 0;
    const Dtype w1lambda = w1r - w1;
    const Dtype w0lambda = Dtype(1.) - w1lambda;
    //
    if (packed) {
      const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =
      h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[channels * w1p]) + 
      h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
    pos1++;
    pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =
      h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[w1p]) + 
      h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  const int num_kernels = height2 * width2;
  caffe_gpu_interp2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
    (num_kernels, rheight, rwidth, channels,
     data1, x1, y1, height1, width1, Height1, Width1,
     data2, x2, y2, height2, width2, Height2, Width2);
  CUDA_POST_KERNEL_CHECK;
}

// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel_backward(const int n, const float rheight, const float rwidth,
    const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    // special case: just copy
    if (height1 == height2 && width1 == width2) {
      const int h1 = h2;
      const int w1 = w2;
      if (packed) {
    Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += pos2[0];
      pos1++;
      pos2++;
    }
      }
      else {
    Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += pos2[0];
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
      return;
    }
    //
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    //
    const float w1r = rwidth * w2;
    const int w1 = w1r;
    const int w1p = (w1 < width1 - 1) ? 1 : 0;
    const Dtype w1lambda = w1r - w1;
    const Dtype w0lambda = Dtype(1.) - w1lambda;
    //
    if (packed) {
      Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
    atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[channels * w1p], h0lambda * w1lambda * pos2[0]);
    atomicAdd(&pos1[channels * h1p * Width1], h1lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[channels * (h1p * Width1 + w1p)], h1lambda * w1lambda * pos2[0]);
    pos1++;
    pos2++;
      }
    }
    else {
      Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
    atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[w1p], h0lambda * w1lambda * pos2[0]);
    atomicAdd(&pos1[h1p * Width1], h1lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[h1p * Width1 + w1p], h1lambda * w1lambda * pos2[0]);
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  const int num_kernels = height2 * width2;
  caffe_gpu_interp2_kernel_backward<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
    (num_kernels, rheight, rwidth, channels,
     data1, x1, y1, height1, width1, Height1, Width1,
     data2, x2, y2, height2, width2, Height2, Width2);
  CUDA_POST_KERNEL_CHECK;
}


// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
__global__ void caffe_gpu_pyramid2_kernel(const int n, const int channels,
    const Dtype *data1, const int height1, const int width1,
    Dtype *data2, const int height2, const int width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    const int w1 = 2 * w2;
    const int h1 = 2 * h2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
      Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =  static_cast<Dtype>(.25) *
      (pos1[0]                 + pos1[channels] + 
       pos1[channels * width1] + pos1[channels * (width1 + 1)]);
    pos1++;
    pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[h1 * width1 + w1];
      Dtype* pos2 = &data2[h2 * width2 + w2];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =  static_cast<Dtype>(.25) *
      (pos1[0]      + pos1[1] + 
       pos1[width1] + pos1[width1 + 1]);
    pos1 += width1 * height1;
    pos2 += width2 * height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels) {
  CHECK(height > 0 && width > 0 && levels >= 0);
  int height1 = height, width1 = width;
  int height2 = height, width2 = width;
  const Dtype *data1 = data;
  Dtype *data2 = data_pyr;
  for (int l = 0; l < levels; ++l) {
    height2 /= 2;
    width2 /= 2;
    if (height2 == 0 || width2 == 0) {
      break;
    }
    const int num_kernels = height2 * width2;
    caffe_gpu_pyramid2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
      (num_kernels, channels, data1, height1, width1, data2, height2, width2);
    CUDA_POST_KERNEL_CHECK;
    data1 = data2;
    height1 = height2;
    width1 = width2;
    data2 += channels * height2 * width2;
  }
}


// Explicit instances
template void caffe_gpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);

template void caffe_gpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);

template void caffe_gpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_gpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);

} // namespace caffe

1.7 在caffe/src/caffe/proto修改 caffe.proto（注意是修改不是建立！！）代碼如下：

message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  // The train / test phase for computation.
  optional Phase phase = 10;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 5;

  // Specifies training parameters (multipliers on global learning constants,
  // and the name and other settings used for weight sharing).
  repeated ParamSpec param = 6;

  // The blobs containing the numeric parameters of the layer.
  repeated BlobProto blobs = 7;

  // Specifies on which bottoms the backpropagation should be skipped.
  // The size must be either 0 or equal to the number of bottoms.
  repeated bool propagate_down = 11;

  // Rules controlling whether and when a layer is included in the network,
  // based on the current NetState.  You may specify a non-zero number of rules
  // to include OR exclude, but not both.  If no include or exclude rules are
  // specified, the layer is always included.  If the current NetState meets
  // ANY (i.e., one or more) of the specified rules, the layer is
  // included/excluded.
  repeated NetStateRule include = 8;
  repeated NetStateRule exclude = 9;

  // Parameters for data pre-processing.
  optional TransformationParameter transform_param = 100;

  // Parameters shared by loss layers.
  optional LossParameter loss_param = 101;

  // Layer type-specific parameters.
  //
  // Note: certain layers may have more than one computational engine
  // for their implementation. These layers include an Engine type and
  // engine parameter for selecting the implementation.
  // The default for the engine is set by the ENGINE switch at compile-time.
  optional AccuracyParameter accuracy_param = 102;
  optional AdaptiveBiasChannelParameter adaptive_bias_channel_param = 148;
  optional ArgMaxParameter argmax_param = 103;
  optional BatchNormParameter batch_norm_param = 139;
  optional BNParameter bn_param = 152;
  optional BiasParameter bias_param = 141;
  optional BiasChannelParameter bias_channel_param = 149;
  optional ConcatParameter concat_param = 104;
  optional ContrastiveLossParameter contrastive_loss_param = 105;
  optional ConvolutionParameter convolution_param = 106;
  optional DataParameter data_param = 107;
  optional DenseCRFParameter dense_crf_param = 146;
  optional DomainTransformParameter domain_transform_param = 147;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
  optional ELUParameter elu_param = 140;
  optional EmbedParameter embed_param = 137;
  optional ExpParameter exp_param = 111;
  optional FlattenParameter flatten_param = 135;
  optional HDF5DataParameter hdf5_data_param = 112;
  optional HDF5OutputParameter hdf5_output_param = 113;
  optional HingeLossParameter hinge_loss_param = 114;
  optional ImageDataParameter image_data_param = 115;
  optional InfogainLossParameter infogain_loss_param = 116;
  optional InnerProductParameter inner_product_param = 117;
  optional InterpParameter interp_param = 150 ; 　　//注意150不能和其他的數字重複，可以自己情況調整
  optional LogParameter log_param = 134;
  optional LRNParameter lrn_param = 118;
  optional MatReadParameter mat_read_param = 151;
  optional MatWriteParameter mat_write_param = 145;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
  optional PythonParameter python_param = 130;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional ScaleParameter scale_param = 142;
  optional SegAccuracyParameter seg_accuracy_param = 144;
  optional SigmoidParameter sigmoid_param = 124;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional TileParameter tile_param = 138;
  optional UniqueLabelParameter unique_label_param = 150;
  optional WindowDataParameter window_data_param = 129;
}

然後再在後面定義這個 interp_param參數：

message InterpParameter {
  optional int32 height = 1 [default = 0]; // Height of output
  optional int32 width = 2 [default = 0]; // Width of output
  optional int32 zoom_factor = 3 [default = 1]; // zoom factor
  optional int32 shrink_factor = 4 [default = 1]; // shrink factor
  optional int32 pad_beg = 5 [default = 0]; // padding at begin of input
  optional int32 pad_end = 6 [default = 0]; // padding at end of input
}

第二步，重新編譯：

2.1 先重新編譯 caffe/src/caffe/proto/caffe.proto。

# 确定protobuf的版本
$ protoc --version
libprotoc 2.5.0

# 編譯caffe.proto，需要先進入caffe/src/caffe/proto目錄下，也可以不進入，指定路徑
$ protoc -I=./ --cpp_out=./ ./caffe.proto

# 檢視編譯結果
$ ls
caffe.pb.cc  caffe.pb.h  caffe.proto

2.2 重新編譯Caffe：

//確定每一步都成功執行
make clean
make -j8
make pycaffe

編譯成功之後就可以一種 interp層了。

第三步 interp層的使用方法：

layer{
  bottom:"input"
  top:"output"
  name:"interp_layer"
  type:"Interp"
  interp_param{　　　　　　　　//注意可按需求改為interp_param{height:60 width:60}（即固定特征圖的尺寸），也可以不需要這個interp_param參數
     shrink_factor:4
     zoom_factor:3
     pad_beg:0
     pad_end:0
 }   
}

在Caffe下自定義網絡層 Interp層

之前跑 Residual Attention Network for Image Classification（2017年CVPR）發現Interp層在Caffe中沒有定義（論文中用來上采樣），于是從網上查找資料自己定義一個Interp層。

第一步先：自定義Interp網絡層：

第二步，重新編譯：

第三步 interp層的使用方法：

繼續閱讀

簡單文檔分類——樸素貝葉斯算法樸素貝葉斯算法簡單文檔分類執行個體步驟總結樸素貝葉斯分類調用(sklearn)

考證大全 | 證券從業資格考試

敲黑闆！2021年證券從業考試考點預測

2021年銀行從業考試考情介紹,果斷收藏!

證券從業合格證書什麼時候列印？有哪些注意事項？

【幹貨滿滿】初級銀行從業考試《個人理财》重點梳理

2020年經濟師考試，難嗎？

初級銀行從業資格證有什麼用？

MBA提前面試純幹貨分享

MBA值得學麼

吳恩達logistic回歸實作

【人工智能行業大師訪談1】吳恩達采訪 Geoffery Hinton

深度學習模型分析人類複雜疾病的準确性

【趨高機器視覺】機器視覺技術原了解析及解決方案

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

cs231n斯坦福基于卷積神經網絡的CV學習筆記（一）KNN和線性分類器/分類器損失/反向傳播一，KNN圖像分類算法二，線性分類器三，線性分類器損失四，反向傳播五，神經網絡