天天看點

在Caffe下自定義網絡層 Interp層

       之前跑 Residual Attention Network for Image Classification(2017年CVPR)發現Interp層在Caffe中沒有定義(論文中用來上采樣),于是從網上查找資料自己定義一個Interp層。

第一步先:自定義Interp網絡層:

1.1  在caffe/include/cafffe/layers目錄下建立一個檔案 interp_layer.hpp ,檔案内容如下:

#ifndef CAFFE_INTERP_LAYER_HPP_
#define CAFFE_INTERP_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
/**
 * @brief Changes the spatial resolution by bi-linear interpolation.
 *        The target size is specified in terms of pixels. 
 *        The start and end pixels of the input are mapped to the start
 *        and end pixels of the output.
 */
template <typename Dtype>
class InterpLayer : public Layer<Dtype> {
 public:
  explicit InterpLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "Interp"; }
  virtual inline int ExactNumBottomBlobs() const { return 1; }     //此處可以根據需求修改Interp層的輸入個數,預設是1;下同
  virtual inline int ExactNumTopBlobs() const { return 1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  
  int num_, channels_;
  int height_in_, width_in_;
  int height_out_, width_out_;
  int pad_beg_, pad_end_;
  int height_in_eff_, width_in_eff_;
};

}  // namespace caffe

#endif // CAFFE_CONV_LAYER_HPP_
           

1.2  在caffe/include/caffe/util檔案夾中建立一個interp.hpp,代碼如下:

#ifndef CAFFE_UTIL_INTERP_H_
#define CAFFE_UTIL_INTERP_H_

#include <cublas_v2.h>
#include "caffe/proto/caffe.pb.h"

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image

template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
          Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
          Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

// Backward (adjoint) operation
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
      Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
      Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);

// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels);

template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels);

  /*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2);
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2);
  */

}  // namespace caffe

#endif
           

1.3 在caffe/include/caffe檔案夾中建立 common.cuh,代碼如下:

#ifndef CAFFE_COMMON_CUH_
#define CAFFE_COMMON_CUH_

#include <cuda.h>

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600   //注意标紅需要添加,不然會報錯(根據自己伺服器CUDA配置需求添加)

#else
// CUDA: atomicAdd is not defined for doubles
static __inline__ __device__ double atomicAdd(double *address, double val) {
  unsigned long long int* address_as_ull = (unsigned long long int*)address;
  unsigned long long int old = *address_as_ull, assumed;
  if (val==0.0)
    return __longlong_as_double(old);
  do {
    assumed = old;
    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
  } while (assumed != old);
  return __longlong_as_double(old);
}

#endif
#endif
           

1.4 在caffe/src/caffe/layers檔案夾下面建立 interp_layer.cpp,代碼如下:

#include <vector>

#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/interp.hpp"
#include "caffe/layers/interp_layer.hpp"

namespace caffe {

template <typename Dtype>
void InterpLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  InterpParameter interp_param = this->layer_param_.interp_param();
  pad_beg_ = interp_param.pad_beg();
  pad_end_ = interp_param.pad_end();
  CHECK_LE(pad_beg_, 0) << "Only supports non-pos padding (cropping) for now";
  CHECK_LE(pad_end_, 0) << "Only supports non-pos padding (cropping) for now";
}

template <typename Dtype>
void InterpLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  num_ = bottom[0]->num();
  channels_ = bottom[0]->channels();
  height_in_ = bottom[0]->height();
  width_in_ = bottom[0]->width();
  height_in_eff_ = height_in_ + pad_beg_ + pad_end_;
  width_in_eff_ = width_in_ + pad_beg_ + pad_end_;
  InterpParameter interp_param = this->layer_param_.interp_param();
  if (interp_param.has_shrink_factor() &&
      !interp_param.has_zoom_factor()) {
    const int shrink_factor = interp_param.shrink_factor();
    CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
    height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
    width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
  } else if (interp_param.has_zoom_factor() &&
             !interp_param.has_shrink_factor()) {
    const int zoom_factor = interp_param.zoom_factor();
    CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
    height_out_ = height_in_eff_ + (height_in_eff_ - 1) * (zoom_factor - 1);
    width_out_ = width_in_eff_ + (width_in_eff_ - 1) * (zoom_factor - 1);
  } else if (interp_param.has_height() && interp_param.has_width()) {
    height_out_  = interp_param.height();
    width_out_  = interp_param.width();
  } else if (interp_param.has_shrink_factor() &&
             interp_param.has_zoom_factor()) {
    const int shrink_factor = interp_param.shrink_factor();
    const int zoom_factor = interp_param.zoom_factor();
    CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
    CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
    height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
    width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
    height_out_ = height_out_ + (height_out_ - 1) * (zoom_factor - 1);
    width_out_ = width_out_ + (width_out_ - 1) * (zoom_factor - 1);
  } else {
    LOG(FATAL);
  }
  CHECK_GT(height_in_eff_, 0) << "height should be positive";
  CHECK_GT(width_in_eff_, 0) << "width should be positive";
  CHECK_GT(height_out_, 0) << "height should be positive";
  CHECK_GT(width_out_, 0) << "width should be positive";
  top[0]->Reshape(num_, channels_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  caffe_cpu_interp2<Dtype,false>(num_ * channels_,
    bottom[0]->cpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->mutable_cpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff());
  caffe_cpu_interp2_backward<Dtype,false>(num_ * channels_,
    bottom[0]->mutable_cpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->cpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

#ifndef CPU_ONLY
template <typename Dtype>
void InterpLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  caffe_gpu_interp2<Dtype,false>(num_ * channels_,
    bottom[0]->gpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->mutable_gpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}

template <typename Dtype>
void InterpLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
  caffe_gpu_interp2_backward<Dtype,false>(num_ * channels_,
    bottom[0]->mutable_gpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
    top[0]->gpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
#endif

#ifdef CPU_ONLY
STUB_GPU(InterpLayer);
#endif

INSTANTIATE_CLASS(InterpLayer);
REGISTER_LAYER_CLASS(Interp);

} // namespace caffe
           

1.5 在caffe/src/caffe/util檔案夾中建立 interp.cpp,代碼如下:

#include "caffe/common.hpp"
#include "caffe/util/interp.hpp"
#include <algorithm>
#include <cmath>

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  // special case: just copy
  if (height1 == height2 && width1 == width2) {
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = w2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
        pos2[0] = pos1[0];
        pos1++;
        pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos2[0] = pos1[0];
        pos1 += Width1 * Height1;
        pos2 += Width2 * Height2;
      }
    }
      }
    }
    return;
  }
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  for (int h2 = 0; h2 < height2; ++h2) {
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    for (int w2 = 0; w2 < width2; ++w2) {
      const float w1r = rwidth * w2;
      const int w1 = w1r;
      const int w1p = (w1 < width1 - 1) ? 1 : 0;
      const Dtype w1lambda = w1r - w1;
      const Dtype w0lambda = Dtype(1.) - w1lambda;
      if (packed) {
    const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos2[0] =
        h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[channels * w1p]) + 
        h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
      pos1++;
      pos2++;
    }
      }
      else {
    const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos2[0] =
        h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[w1p]) + 
        h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
    }
  }
}


// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  // special case: same-size matching grids
  if (height1 == height2 && width1 == width2) {
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = w2;
    if (packed) {
      Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
        pos1[0] += pos2[0];
        pos1++;
        pos2++;
      }
    }
    else {
      Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos1[0] += pos2[0];
        pos1 += Width1 * Height1;
        pos2 += Width2 * Height2;
      }
    }
      }
    }
    return;
  }
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  for (int h2 = 0; h2 < height2; ++h2) {
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    for (int w2 = 0; w2 < width2; ++w2) {
      const float w1r = rwidth * w2;
      const int w1 = w1r;
      const int w1p = (w1 < width1 - 1) ? 1 : 0;
      const Dtype w1lambda = w1r - w1;
      const Dtype w0lambda = Dtype(1.) - w1lambda;
      if (packed) {
    Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += h0lambda * w0lambda * pos2[0];
      pos1[channels * w1p] += h0lambda * w1lambda * pos2[0];
      pos1[channels * h1p * Width1] += h1lambda * w0lambda * pos2[0];
      pos1[channels * (h1p * Width1 + w1p)] += h1lambda * w1lambda * pos2[0];
      pos1++;
      pos2++;
    }
      }
      else {
    Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += h0lambda * w0lambda * pos2[0];
      pos1[w1p] += h0lambda * w1lambda * pos2[0];
      pos1[h1p * Width1] += h1lambda * w0lambda * pos2[0];
      pos1[h1p * Width1 + w1p] += h1lambda * w1lambda * pos2[0];
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
    }
  }
}

// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels) {
  CHECK(height > 0 && width > 0 && levels >= 0);
  int height1 = height, width1 = width;
  int height2 = height, width2 = width;
  const Dtype *data1 = data;
  Dtype *data2 = data_pyr;
  for (int l = 0; l < levels; ++l) {
    height2 /= 2;
    width2 /= 2;
    if (height2 == 0 || width2 == 0) {
      break;
    }
    for (int h2 = 0; h2 < height2; ++h2) {
      const int h1 = 2 * h2;
      for (int w2 = 0; w2 < width2; ++w2) {
    const int w1 = 2 * w2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
      Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
      for (int c = 0; c < channels; ++c) {
        pos2[0] =  static_cast<Dtype>(.25) *
          (pos1[0]                 + pos1[channels] + 
           pos1[channels * width1] + pos1[channels * (width1 + 1)]);
        pos1++;
        pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[h1 * width1 + w1];
      Dtype* pos2 = &data2[h2 * width2 + w2];
      for (int c = 0; c < channels; ++c) {
        pos2[0] =  static_cast<Dtype>(.25) *
          (pos1[0]      + pos1[1] + 
           pos1[width1] + pos1[width1 + 1]);
        pos1 += width1 * height1;
        pos2 += width2 * height2;
      }
    }
      }
    }
    data1 = data2;
    height1 = height2;
    width1 = width2;
    data2 += channels * height2 * width2;
  }
}

  /*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2) {
  const int num1 = mosaic_params1.rects_size();
  const int num2 = mosaic_params2.rects_size();
  CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
  const int num = std::max(num1, num2);
  for (int i = 0; i < num; ++i) {
    const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
    const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
    int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
    level = std::max(0, std::min(levels, level));
    if (data_pyr == 0 || level == 0) {
      caffe_cpu_interp2<Dtype,packed>(channels,
      data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
    }
    else {
      const Dtype *data_pyr_l = data_pyr;
      int factor = 2;
      for (int l = 1; l < level; ++l) {
    data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
    factor *= 2;
      }
      caffe_cpu_interp2<Dtype,packed>(channels,
      data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());      
    }
  }
}
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
    const Dtype *data1, const MosaicParameter mosaic_params1,
    const Dtype *data_pyr, const int levels,
          Dtype *data2, const MosaicParameter mosaic_params2) {
  const int num1 = mosaic_params1.rects_size();
  const int num2 = mosaic_params2.rects_size();
  CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
  const int num = std::max(num1, num2);
  for (int i = 0; i < num; ++i) {
    const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
    const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
    int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
    level = std::max(0, std::min(levels, level));
    if (data_pyr == 0 || level == 0) {
      caffe_gpu_interp2<Dtype,packed>(channels,
      data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
    }
    else {
      const Dtype *data_pyr_l = data_pyr;
      int factor = 2;
      for (int l = 1; l < level; ++l) {
    data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
    factor *= 2;
      }
      caffe_gpu_interp2<Dtype,packed>(channels,
      data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
      data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());      
    }
  }
}
  */

// Explicit instances
template void caffe_cpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);

template void caffe_cpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);

template void caffe_cpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_cpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);

  /*
template void caffe_cpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_cpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
  */

} // namespace caffe
           

1.6 在caffe/src/caffe/util檔案夾中建立 interp.cu,代碼如下:

#include "caffe/common.hpp"
#include "caffe/common.cuh"
#include "caffe/util/interp.hpp"

namespace caffe {

// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel(const int n, const float rheight, const float rwidth,
    const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    // special case: just copy
    if (height1 == height2 && width1 == width2) {
      const int h1 = h2;
      const int w1 = w2;
      if (packed) {
    const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos2[0] = pos1[0];
      pos1++;
      pos2++;
    }
      }
      else {
    const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
    pos2[0] = pos1[0];
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
    }
      }
      return;
    }
    //
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    //
    const float w1r = rwidth * w2;
    const int w1 = w1r;
    const int w1p = (w1 < width1 - 1) ? 1 : 0;
    const Dtype w1lambda = w1r - w1;
    const Dtype w0lambda = Dtype(1.) - w1lambda;
    //
    if (packed) {
      const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =
      h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[channels * w1p]) + 
      h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
    pos1++;
    pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =
      h0lambda * (w0lambda * pos1[0]            + w1lambda * pos1[w1p]) + 
      h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
    const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  const int num_kernels = height2 * width2;
  caffe_gpu_interp2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
    (num_kernels, rheight, rwidth, channels,
     data1, x1, y1, height1, width1, Height1, Width1,
     data2, x2, y2, height2, width2, Height2, Width2);
  CUDA_POST_KERNEL_CHECK;
}

// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel_backward(const int n, const float rheight, const float rwidth,
    const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    // special case: just copy
    if (height1 == height2 && width1 == width2) {
      const int h1 = h2;
      const int w1 = w2;
      if (packed) {
    Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
    const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += pos2[0];
      pos1++;
      pos2++;
    }
      }
      else {
    Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
    const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
    for (int c = 0; c < channels; ++c) {
      pos1[0] += pos2[0];
      pos1 += Width1 * Height1;
      pos2 += Width2 * Height2;
    }
      }
      return;
    }
    //
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < height1 - 1) ? 1 : 0;
    const Dtype h1lambda = h1r - h1;
    const Dtype h0lambda = Dtype(1.) - h1lambda;
    //
    const float w1r = rwidth * w2;
    const int w1 = w1r;
    const int w1p = (w1 < width1 - 1) ? 1 : 0;
    const Dtype w1lambda = w1r - w1;
    const Dtype w0lambda = Dtype(1.) - w1lambda;
    //
    if (packed) {
      Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
      const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
      for (int c = 0; c < channels; ++c) {
    atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[channels * w1p], h0lambda * w1lambda * pos2[0]);
    atomicAdd(&pos1[channels * h1p * Width1], h1lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[channels * (h1p * Width1 + w1p)], h1lambda * w1lambda * pos2[0]);
    pos1++;
    pos2++;
      }
    }
    else {
      Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
      const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
      for (int c = 0; c < channels; ++c) {
    atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[w1p], h0lambda * w1lambda * pos2[0]);
    atomicAdd(&pos1[h1p * Width1], h1lambda * w0lambda * pos2[0]);
    atomicAdd(&pos1[h1p * Width1 + w1p], h1lambda * w1lambda * pos2[0]);
    pos1 += Width1 * Height1;
    pos2 += Width2 * Height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
    Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
    const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
  CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
  CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
  const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
  const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
  const int num_kernels = height2 * width2;
  caffe_gpu_interp2_kernel_backward<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
    (num_kernels, rheight, rwidth, channels,
     data1, x1, y1, height1, width1, Height1, Width1,
     data2, x2, y2, height2, width2, Height2, Width2);
  CUDA_POST_KERNEL_CHECK;
}


// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
__global__ void caffe_gpu_pyramid2_kernel(const int n, const int channels,
    const Dtype *data1, const int height1, const int width1,
    Dtype *data2, const int height2, const int width2) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  if (index < n) {
    const int w2 = index % width2; // 0:width2-1
    const int h2 = index / width2; // 0:height2-1
    const int w1 = 2 * w2;
    const int h1 = 2 * h2;
    if (packed) {
      const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
      Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =  static_cast<Dtype>(.25) *
      (pos1[0]                 + pos1[channels] + 
       pos1[channels * width1] + pos1[channels * (width1 + 1)]);
    pos1++;
    pos2++;
      }
    }
    else {
      const Dtype* pos1 = &data1[h1 * width1 + w1];
      Dtype* pos2 = &data2[h2 * width2 + w2];
      for (int c = 0; c < channels; ++c) {
    pos2[0] =  static_cast<Dtype>(.25) *
      (pos1[0]      + pos1[1] + 
       pos1[width1] + pos1[width1 + 1]);
    pos1 += width1 * height1;
    pos2 += width2 * height2;
      }
    }
  }
}

template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
    const Dtype *data, const int height, const int width,
    Dtype *data_pyr, const int levels) {
  CHECK(height > 0 && width > 0 && levels >= 0);
  int height1 = height, width1 = width;
  int height2 = height, width2 = width;
  const Dtype *data1 = data;
  Dtype *data2 = data_pyr;
  for (int l = 0; l < levels; ++l) {
    height2 /= 2;
    width2 /= 2;
    if (height2 == 0 || width2 == 0) {
      break;
    }
    const int num_kernels = height2 * width2;
    caffe_gpu_pyramid2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
      (num_kernels, channels, data1, height1, width1, data2, height2, width2);
    CUDA_POST_KERNEL_CHECK;
    data1 = data2;
    height1 = height2;
    width1 = width2;
    data2 += channels * height2 * width2;
  }
}


// Explicit instances
template void caffe_gpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);

template void caffe_gpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);

template void caffe_gpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_gpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);

} // namespace caffe
           

1.7 在caffe/src/caffe/proto修改 caffe.proto(注意是修改  不是建立!!)代碼如下:

message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  // The train / test phase for computation.
  optional Phase phase = 10;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 5;

  // Specifies training parameters (multipliers on global learning constants,
  // and the name and other settings used for weight sharing).
  repeated ParamSpec param = 6;

  // The blobs containing the numeric parameters of the layer.
  repeated BlobProto blobs = 7;

  // Specifies on which bottoms the backpropagation should be skipped.
  // The size must be either 0 or equal to the number of bottoms.
  repeated bool propagate_down = 11;

  // Rules controlling whether and when a layer is included in the network,
  // based on the current NetState.  You may specify a non-zero number of rules
  // to include OR exclude, but not both.  If no include or exclude rules are
  // specified, the layer is always included.  If the current NetState meets
  // ANY (i.e., one or more) of the specified rules, the layer is
  // included/excluded.
  repeated NetStateRule include = 8;
  repeated NetStateRule exclude = 9;

  // Parameters for data pre-processing.
  optional TransformationParameter transform_param = 100;

  // Parameters shared by loss layers.
  optional LossParameter loss_param = 101;

  // Layer type-specific parameters.
  //
  // Note: certain layers may have more than one computational engine
  // for their implementation. These layers include an Engine type and
  // engine parameter for selecting the implementation.
  // The default for the engine is set by the ENGINE switch at compile-time.
  optional AccuracyParameter accuracy_param = 102;
  optional AdaptiveBiasChannelParameter adaptive_bias_channel_param = 148;
  optional ArgMaxParameter argmax_param = 103;
  optional BatchNormParameter batch_norm_param = 139;
  optional BNParameter bn_param = 152;
  optional BiasParameter bias_param = 141;
  optional BiasChannelParameter bias_channel_param = 149;
  optional ConcatParameter concat_param = 104;
  optional ContrastiveLossParameter contrastive_loss_param = 105;
  optional ConvolutionParameter convolution_param = 106;
  optional DataParameter data_param = 107;
  optional DenseCRFParameter dense_crf_param = 146;
  optional DomainTransformParameter domain_transform_param = 147;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
  optional ELUParameter elu_param = 140;
  optional EmbedParameter embed_param = 137;
  optional ExpParameter exp_param = 111;
  optional FlattenParameter flatten_param = 135;
  optional HDF5DataParameter hdf5_data_param = 112;
  optional HDF5OutputParameter hdf5_output_param = 113;
  optional HingeLossParameter hinge_loss_param = 114;
  optional ImageDataParameter image_data_param = 115;
  optional InfogainLossParameter infogain_loss_param = 116;
  optional InnerProductParameter inner_product_param = 117;
  optional InterpParameter interp_param = 150 ;   //注意150不能和其他的數字重複,可以自己情況調整
  optional LogParameter log_param = 134;
  optional LRNParameter lrn_param = 118;
  optional MatReadParameter mat_read_param = 151;
  optional MatWriteParameter mat_write_param = 145;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
  optional PythonParameter python_param = 130;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional ScaleParameter scale_param = 142;
  optional SegAccuracyParameter seg_accuracy_param = 144;
  optional SigmoidParameter sigmoid_param = 124;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional TileParameter tile_param = 138;
  optional UniqueLabelParameter unique_label_param = 150;
  optional WindowDataParameter window_data_param = 129;
}
           

然後再在後面定義這個 interp_param參數:

message InterpParameter {
  optional int32 height = 1 [default = 0]; // Height of output
  optional int32 width = 2 [default = 0]; // Width of output
  optional int32 zoom_factor = 3 [default = 1]; // zoom factor
  optional int32 shrink_factor = 4 [default = 1]; // shrink factor
  optional int32 pad_beg = 5 [default = 0]; // padding at begin of input
  optional int32 pad_end = 6 [default = 0]; // padding at end of input
}
           

第二步,重新編譯:

2.1  先重新編譯 caffe/src/caffe/proto/caffe.proto。

# 确定protobuf的版本
$ protoc --version
libprotoc 2.5.0

# 編譯caffe.proto,需要先進入caffe/src/caffe/proto目錄下,也可以不進入,指定路徑
$ protoc -I=./ --cpp_out=./ ./caffe.proto

# 檢視編譯結果
$ ls
caffe.pb.cc  caffe.pb.h  caffe.proto
           

2.2 重新編譯Caffe:

//確定每一步都成功執行
make clean
make -j8
make pycaffe
           

編譯成功之後就可以一種 interp層了。

第三步 interp層的使用方法:

layer{
  bottom:"input"
  top:"output"
  name:"interp_layer"
  type:"Interp"
  interp_param{        //注意可按需求改為interp_param{height:60 width:60}(即固定特征圖的尺寸),也可以不需要這個interp_param參數
     shrink_factor:4
     zoom_factor:3
     pad_beg:0
     pad_end:0
 }   
}
           

繼續閱讀