之前跑 Residual Attention Network for Image Classification(2017年CVPR)發現Interp層在Caffe中沒有定義(論文中用來上采樣),于是從網上查找資料自己定義一個Interp層。
第一步先:自定義Interp網絡層:
1.1 在caffe/include/cafffe/layers目錄下建立一個檔案 interp_layer.hpp ,檔案内容如下:
#ifndef CAFFE_INTERP_LAYER_HPP_
#define CAFFE_INTERP_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/**
* @brief Changes the spatial resolution by bi-linear interpolation.
* The target size is specified in terms of pixels.
* The start and end pixels of the input are mapped to the start
* and end pixels of the output.
*/
template <typename Dtype>
class InterpLayer : public Layer<Dtype> {
public:
explicit InterpLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "Interp"; }
virtual inline int ExactNumBottomBlobs() const { return 1; } //此處可以根據需求修改Interp層的輸入個數,預設是1;下同
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int num_, channels_;
int height_in_, width_in_;
int height_out_, width_out_;
int pad_beg_, pad_end_;
int height_in_eff_, width_in_eff_;
};
} // namespace caffe
#endif // CAFFE_CONV_LAYER_HPP_
1.2 在caffe/include/caffe/util檔案夾中建立一個interp.hpp,代碼如下:
#ifndef CAFFE_UTIL_INTERP_H_
#define CAFFE_UTIL_INTERP_H_
#include <cublas_v2.h>
#include "caffe/proto/caffe.pb.h"
namespace caffe {
// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);
template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);
// Backward (adjoint) operation
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);
template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2);
// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
const Dtype *data, const int height, const int width,
Dtype *data_pyr, const int levels);
template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
const Dtype *data, const int height, const int width,
Dtype *data_pyr, const int levels);
/*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
const Dtype *data1, const MosaicParameter mosaic_params1,
const Dtype *data_pyr, const int levels,
Dtype *data2, const MosaicParameter mosaic_params2);
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
const Dtype *data1, const MosaicParameter mosaic_params1,
const Dtype *data_pyr, const int levels,
Dtype *data2, const MosaicParameter mosaic_params2);
*/
} // namespace caffe
#endif
1.3 在caffe/include/caffe檔案夾中建立 common.cuh,代碼如下:
#ifndef CAFFE_COMMON_CUH_
#define CAFFE_COMMON_CUH_
#include <cuda.h>
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 //注意标紅需要添加,不然會報錯(根據自己伺服器CUDA配置需求添加)
#else
// CUDA: atomicAdd is not defined for doubles
static __inline__ __device__ double atomicAdd(double *address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
if (val==0.0)
return __longlong_as_double(old);
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
#endif
1.4 在caffe/src/caffe/layers檔案夾下面建立 interp_layer.cpp,代碼如下:
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/interp.hpp"
#include "caffe/layers/interp_layer.hpp"
namespace caffe {
template <typename Dtype>
void InterpLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
InterpParameter interp_param = this->layer_param_.interp_param();
pad_beg_ = interp_param.pad_beg();
pad_end_ = interp_param.pad_end();
CHECK_LE(pad_beg_, 0) << "Only supports non-pos padding (cropping) for now";
CHECK_LE(pad_end_, 0) << "Only supports non-pos padding (cropping) for now";
}
template <typename Dtype>
void InterpLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
num_ = bottom[0]->num();
channels_ = bottom[0]->channels();
height_in_ = bottom[0]->height();
width_in_ = bottom[0]->width();
height_in_eff_ = height_in_ + pad_beg_ + pad_end_;
width_in_eff_ = width_in_ + pad_beg_ + pad_end_;
InterpParameter interp_param = this->layer_param_.interp_param();
if (interp_param.has_shrink_factor() &&
!interp_param.has_zoom_factor()) {
const int shrink_factor = interp_param.shrink_factor();
CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
} else if (interp_param.has_zoom_factor() &&
!interp_param.has_shrink_factor()) {
const int zoom_factor = interp_param.zoom_factor();
CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
height_out_ = height_in_eff_ + (height_in_eff_ - 1) * (zoom_factor - 1);
width_out_ = width_in_eff_ + (width_in_eff_ - 1) * (zoom_factor - 1);
} else if (interp_param.has_height() && interp_param.has_width()) {
height_out_ = interp_param.height();
width_out_ = interp_param.width();
} else if (interp_param.has_shrink_factor() &&
interp_param.has_zoom_factor()) {
const int shrink_factor = interp_param.shrink_factor();
const int zoom_factor = interp_param.zoom_factor();
CHECK_GE(shrink_factor, 1) << "Shrink factor must be positive";
CHECK_GE(zoom_factor, 1) << "Zoom factor must be positive";
height_out_ = (height_in_eff_ - 1) / shrink_factor + 1;
width_out_ = (width_in_eff_ - 1) / shrink_factor + 1;
height_out_ = height_out_ + (height_out_ - 1) * (zoom_factor - 1);
width_out_ = width_out_ + (width_out_ - 1) * (zoom_factor - 1);
} else {
LOG(FATAL);
}
CHECK_GT(height_in_eff_, 0) << "height should be positive";
CHECK_GT(width_in_eff_, 0) << "width should be positive";
CHECK_GT(height_out_, 0) << "height should be positive";
CHECK_GT(width_out_, 0) << "width should be positive";
top[0]->Reshape(num_, channels_, height_out_, width_out_);
}
template <typename Dtype>
void InterpLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
caffe_cpu_interp2<Dtype,false>(num_ * channels_,
bottom[0]->cpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
top[0]->mutable_cpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
template <typename Dtype>
void InterpLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (!propagate_down[0]) { return; }
caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff());
caffe_cpu_interp2_backward<Dtype,false>(num_ * channels_,
bottom[0]->mutable_cpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
top[0]->cpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
#ifndef CPU_ONLY
template <typename Dtype>
void InterpLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
caffe_gpu_interp2<Dtype,false>(num_ * channels_,
bottom[0]->gpu_data(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
top[0]->mutable_gpu_data(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
template <typename Dtype>
void InterpLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (!propagate_down[0]) { return; }
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
caffe_gpu_interp2_backward<Dtype,false>(num_ * channels_,
bottom[0]->mutable_gpu_diff(), - pad_beg_, - pad_beg_, height_in_eff_, width_in_eff_, height_in_, width_in_,
top[0]->gpu_diff(), 0, 0, height_out_, width_out_, height_out_, width_out_);
}
#endif
#ifdef CPU_ONLY
STUB_GPU(InterpLayer);
#endif
INSTANTIATE_CLASS(InterpLayer);
REGISTER_LAYER_CLASS(Interp);
} // namespace caffe
1.5 在caffe/src/caffe/util檔案夾中建立 interp.cpp,代碼如下:
#include "caffe/common.hpp"
#include "caffe/util/interp.hpp"
#include <algorithm>
#include <cmath>
namespace caffe {
// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
void caffe_cpu_interp2(const int channels,
const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
// special case: just copy
if (height1 == height2 && width1 == width2) {
for (int h2 = 0; h2 < height2; ++h2) {
const int h1 = h2;
for (int w2 = 0; w2 < width2; ++w2) {
const int w1 = w2;
if (packed) {
const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos2[0] = pos1[0];
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] = pos1[0];
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
return;
}
const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
for (int h2 = 0; h2 < height2; ++h2) {
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Dtype h1lambda = h1r - h1;
const Dtype h0lambda = Dtype(1.) - h1lambda;
for (int w2 = 0; w2 < width2; ++w2) {
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Dtype w1lambda = w1r - w1;
const Dtype w0lambda = Dtype(1.) - w1lambda;
if (packed) {
const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[channels * w1p]) +
h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
}
// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
void caffe_cpu_interp2_backward(const int channels,
Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
// special case: same-size matching grids
if (height1 == height2 && width1 == width2) {
for (int h2 = 0; h2 < height2; ++h2) {
const int h1 = h2;
for (int w2 = 0; w2 < width2; ++w2) {
const int w1 = w2;
if (packed) {
Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos1[0] += pos2[0];
pos1++;
pos2++;
}
}
else {
Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos1[0] += pos2[0];
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
return;
}
const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
for (int h2 = 0; h2 < height2; ++h2) {
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Dtype h1lambda = h1r - h1;
const Dtype h0lambda = Dtype(1.) - h1lambda;
for (int w2 = 0; w2 < width2; ++w2) {
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Dtype w1lambda = w1r - w1;
const Dtype w0lambda = Dtype(1.) - w1lambda;
if (packed) {
Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos1[0] += h0lambda * w0lambda * pos2[0];
pos1[channels * w1p] += h0lambda * w1lambda * pos2[0];
pos1[channels * h1p * Width1] += h1lambda * w0lambda * pos2[0];
pos1[channels * (h1p * Width1 + w1p)] += h1lambda * w1lambda * pos2[0];
pos1++;
pos2++;
}
}
else {
Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos1[0] += h0lambda * w0lambda * pos2[0];
pos1[w1p] += h0lambda * w1lambda * pos2[0];
pos1[h1p * Width1] += h1lambda * w0lambda * pos2[0];
pos1[h1p * Width1 + w1p] += h1lambda * w1lambda * pos2[0];
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
}
// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
void caffe_cpu_pyramid2(const int channels,
const Dtype *data, const int height, const int width,
Dtype *data_pyr, const int levels) {
CHECK(height > 0 && width > 0 && levels >= 0);
int height1 = height, width1 = width;
int height2 = height, width2 = width;
const Dtype *data1 = data;
Dtype *data2 = data_pyr;
for (int l = 0; l < levels; ++l) {
height2 /= 2;
width2 /= 2;
if (height2 == 0 || width2 == 0) {
break;
}
for (int h2 = 0; h2 < height2; ++h2) {
const int h1 = 2 * h2;
for (int w2 = 0; w2 < width2; ++w2) {
const int w1 = 2 * w2;
if (packed) {
const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] = static_cast<Dtype>(.25) *
(pos1[0] + pos1[channels] +
pos1[channels * width1] + pos1[channels * (width1 + 1)]);
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[h1 * width1 + w1];
Dtype* pos2 = &data2[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos2[0] = static_cast<Dtype>(.25) *
(pos1[0] + pos1[1] +
pos1[width1] + pos1[width1 + 1]);
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
}
data1 = data2;
height1 = height2;
width1 = width2;
data2 += channels * height2 * width2;
}
}
/*
template <typename Dtype, bool packed>
void caffe_cpu_mosaic(const int channels,
const Dtype *data1, const MosaicParameter mosaic_params1,
const Dtype *data_pyr, const int levels,
Dtype *data2, const MosaicParameter mosaic_params2) {
const int num1 = mosaic_params1.rects_size();
const int num2 = mosaic_params2.rects_size();
CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
const int num = std::max(num1, num2);
for (int i = 0; i < num; ++i) {
const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
level = std::max(0, std::min(levels, level));
if (data_pyr == 0 || level == 0) {
caffe_cpu_interp2<Dtype,packed>(channels,
data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
}
else {
const Dtype *data_pyr_l = data_pyr;
int factor = 2;
for (int l = 1; l < level; ++l) {
data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
factor *= 2;
}
caffe_cpu_interp2<Dtype,packed>(channels,
data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
}
}
}
template <typename Dtype, bool packed>
void caffe_gpu_mosaic(const int channels,
const Dtype *data1, const MosaicParameter mosaic_params1,
const Dtype *data_pyr, const int levels,
Dtype *data2, const MosaicParameter mosaic_params2) {
const int num1 = mosaic_params1.rects_size();
const int num2 = mosaic_params2.rects_size();
CHECK(num1 == num2 || (num1 == 1 && num2 > 1) || (num2 == 1 && num1 > 1));
const int num = std::max(num1, num2);
for (int i = 0; i < num; ++i) {
const Rect rect1 = mosaic_params1.rects((i < num1) ? i : 0);
const Rect rect2 = mosaic_params2.rects((i < num2) ? i : 0);
int level = log2(sqrt((float)rect1.height() * rect1.width() / rect2.height() / rect2.width()));
level = std::max(0, std::min(levels, level));
if (data_pyr == 0 || level == 0) {
caffe_gpu_interp2<Dtype,packed>(channels,
data1, rect1.x(), rect1.y(), rect1.height(), rect1.width(), mosaic_params1.height(), mosaic_params1.width(),
data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
}
else {
const Dtype *data_pyr_l = data_pyr;
int factor = 2;
for (int l = 1; l < level; ++l) {
data_pyr_l += channels * (mosaic_params1.height() / factor) * (mosaic_params1.width() / factor);
factor *= 2;
}
caffe_gpu_interp2<Dtype,packed>(channels,
data_pyr_l, rect1.x() / factor, rect1.y() / factor, rect1.height() / factor, rect1.width() / factor, mosaic_params1.height() / factor, mosaic_params1.width() / factor,
data2, rect2.x(), rect2.y(), rect2.height(), rect2.width(), mosaic_params2.height(), mosaic_params2.width());
}
}
}
*/
// Explicit instances
template void caffe_cpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);
template void caffe_cpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_cpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_cpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);
/*
template void caffe_cpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_cpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_cpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<float,false>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<float,true>(const int, const float *, const MosaicParameter, const float *, const int, float *, const MosaicParameter);
template void caffe_gpu_mosaic<double,false>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
template void caffe_gpu_mosaic<double,true>(const int, const double *, const MosaicParameter, const double *, const int, double *, const MosaicParameter);
*/
} // namespace caffe
1.6 在caffe/src/caffe/util檔案夾中建立 interp.cu,代碼如下:
#include "caffe/common.hpp"
#include "caffe/common.cuh"
#include "caffe/util/interp.hpp"
namespace caffe {
// Bi-linear interpolation
// IN : [channels height1 width1] cropped from a bigger [Height1 Width1] image
// OUT: [channels height2 width2] cropped from a bigger [Height2 Width2] image
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel(const int n, const float rheight, const float rwidth,
const int channels,
const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
const int w2 = index % width2; // 0:width2-1
const int h2 = index / width2; // 0:height2-1
// special case: just copy
if (height1 == height2 && width1 == width2) {
const int h1 = h2;
const int w1 = w2;
if (packed) {
const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos2[0] = pos1[0];
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] = pos1[0];
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
return;
}
//
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Dtype h1lambda = h1r - h1;
const Dtype h0lambda = Dtype(1.) - h1lambda;
//
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Dtype w1lambda = w1r - w1;
const Dtype w0lambda = Dtype(1.) - w1lambda;
//
if (packed) {
const Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[channels * w1p]) +
h1lambda * (w0lambda * pos1[channels * h1p * Width1] + w1lambda * pos1[channels * (h1p * Width1 + w1p)]);
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda * (w0lambda * pos1[h1p * Width1] + w1lambda * pos1[h1p * Width1 + w1p]);
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
template <typename Dtype, bool packed>
void caffe_gpu_interp2(const int channels,
const Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
const int num_kernels = height2 * width2;
caffe_gpu_interp2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
(num_kernels, rheight, rwidth, channels,
data1, x1, y1, height1, width1, Height1, Width1,
data2, x2, y2, height2, width2, Height2, Width2);
CUDA_POST_KERNEL_CHECK;
}
// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, bool packed>
__global__ void caffe_gpu_interp2_kernel_backward(const int n, const float rheight, const float rwidth,
const int channels,
Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
const int w2 = index % width2; // 0:width2-1
const int h2 = index / width2; // 0:height2-1
// special case: just copy
if (height1 == height2 && width1 == width2) {
const int h1 = h2;
const int w1 = w2;
if (packed) {
Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
pos1[0] += pos2[0];
pos1++;
pos2++;
}
}
else {
Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
pos1[0] += pos2[0];
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
return;
}
//
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Dtype h1lambda = h1r - h1;
const Dtype h0lambda = Dtype(1.) - h1lambda;
//
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Dtype w1lambda = w1r - w1;
const Dtype w0lambda = Dtype(1.) - w1lambda;
//
if (packed) {
Dtype* pos1 = &data1[channels * ((y1 + h1) * Width1 + (x1 + w1))];
const Dtype* pos2 = &data2[channels * ((y2 + h2) * Width2 + (x2 + w2))];
for (int c = 0; c < channels; ++c) {
atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
atomicAdd(&pos1[channels * w1p], h0lambda * w1lambda * pos2[0]);
atomicAdd(&pos1[channels * h1p * Width1], h1lambda * w0lambda * pos2[0]);
atomicAdd(&pos1[channels * (h1p * Width1 + w1p)], h1lambda * w1lambda * pos2[0]);
pos1++;
pos2++;
}
}
else {
Dtype* pos1 = &data1[(y1 + h1) * Width1 + (x1 + w1)];
const Dtype* pos2 = &data2[(y2 + h2) * Width2 + (x2 + w2)];
for (int c = 0; c < channels; ++c) {
atomicAdd(&pos1[0], h0lambda * w0lambda * pos2[0]);
atomicAdd(&pos1[w1p], h0lambda * w1lambda * pos2[0]);
atomicAdd(&pos1[h1p * Width1], h1lambda * w0lambda * pos2[0]);
atomicAdd(&pos1[h1p * Width1 + w1p], h1lambda * w1lambda * pos2[0]);
pos1 += Width1 * Height1;
pos2 += Width2 * Height2;
}
}
}
}
template <typename Dtype, bool packed>
void caffe_gpu_interp2_backward(const int channels,
Dtype *data1, const int x1, const int y1, const int height1, const int width1, const int Height1, const int Width1,
const Dtype *data2, const int x2, const int y2, const int height2, const int width2, const int Height2, const int Width2) {
CHECK(x1 >= 0 && y1 >= 0 && height1 > 0 && width1 > 0 && x2 >= 0 && y2 >= 0 && height2 > 0 && width2 > 0);
CHECK(Width1 >= width1 + x1 && Height1 >= height1 + y1 && Width2 >= width2 + x2 && Height2 >= height2 + y2);
const float rheight = (height2 > 1) ? static_cast<float>(height1 - 1) / (height2 - 1) : 0.f;
const float rwidth = (width2 > 1) ? static_cast<float>(width1 - 1) / (width2 - 1) : 0.f;
const int num_kernels = height2 * width2;
caffe_gpu_interp2_kernel_backward<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
(num_kernels, rheight, rwidth, channels,
data1, x1, y1, height1, width1, Height1, Width1,
data2, x2, y2, height2, width2, Height2, Width2);
CUDA_POST_KERNEL_CHECK;
}
// Create Gaussian pyramid of an image. Assume output space is pre-allocated.
// IN : [channels height width]
template <typename Dtype, bool packed>
__global__ void caffe_gpu_pyramid2_kernel(const int n, const int channels,
const Dtype *data1, const int height1, const int width1,
Dtype *data2, const int height2, const int width2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
const int w2 = index % width2; // 0:width2-1
const int h2 = index / width2; // 0:height2-1
const int w1 = 2 * w2;
const int h1 = 2 * h2;
if (packed) {
const Dtype* pos1 = &data1[channels * (h1 * width1 + w1)];
Dtype* pos2 = &data2[channels * (h2 * width2 + w2)];
for (int c = 0; c < channels; ++c) {
pos2[0] = static_cast<Dtype>(.25) *
(pos1[0] + pos1[channels] +
pos1[channels * width1] + pos1[channels * (width1 + 1)]);
pos1++;
pos2++;
}
}
else {
const Dtype* pos1 = &data1[h1 * width1 + w1];
Dtype* pos2 = &data2[h2 * width2 + w2];
for (int c = 0; c < channels; ++c) {
pos2[0] = static_cast<Dtype>(.25) *
(pos1[0] + pos1[1] +
pos1[width1] + pos1[width1 + 1]);
pos1 += width1 * height1;
pos2 += width2 * height2;
}
}
}
}
template <typename Dtype, bool packed>
void caffe_gpu_pyramid2(const int channels,
const Dtype *data, const int height, const int width,
Dtype *data_pyr, const int levels) {
CHECK(height > 0 && width > 0 && levels >= 0);
int height1 = height, width1 = width;
int height2 = height, width2 = width;
const Dtype *data1 = data;
Dtype *data2 = data_pyr;
for (int l = 0; l < levels; ++l) {
height2 /= 2;
width2 /= 2;
if (height2 == 0 || width2 == 0) {
break;
}
const int num_kernels = height2 * width2;
caffe_gpu_pyramid2_kernel<Dtype,packed><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>
(num_kernels, channels, data1, height1, width1, data2, height2, width2);
CUDA_POST_KERNEL_CHECK;
data1 = data2;
height1 = height2;
width1 = width2;
data2 += channels * height2 * width2;
}
}
// Explicit instances
template void caffe_gpu_interp2<float,false>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<float,true>(const int, const float *, const int, const int, const int, const int, const int, const int, float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,false>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2<double,true>(const int, const double *, const int, const int, const int, const int, const int, const int, double *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2_backward<float,false>(const int, float *, const int, const int, const int, const int, const int, const int, const float *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_interp2_backward<double,false>(const int, double *, const int, const int, const int, const int, const int, const int, const double *, const int, const int, const int, const int, const int, const int);
template void caffe_gpu_pyramid2<float,false>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<float,true>(const int, const float *, const int, const int, float *, const int);
template void caffe_gpu_pyramid2<double,false>(const int, const double *, const int, const int, double *, const int);
template void caffe_gpu_pyramid2<double,true>(const int, const double *, const int, const int, double *, const int);
} // namespace caffe
1.7 在caffe/src/caffe/proto修改 caffe.proto(注意是修改 不是建立!!)代碼如下:
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies on which bottoms the backpropagation should be skipped.
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional AdaptiveBiasChannelParameter adaptive_bias_channel_param = 148;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
optional BNParameter bn_param = 152;
optional BiasParameter bias_param = 141;
optional BiasChannelParameter bias_channel_param = 149;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional DataParameter data_param = 107;
optional DenseCRFParameter dense_crf_param = 146;
optional DomainTransformParameter domain_transform_param = 147;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional InterpParameter interp_param = 150 ; //注意150不能和其他的數字重複,可以自己情況調整
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MatReadParameter mat_read_param = 151;
optional MatWriteParameter mat_write_param = 145;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional SegAccuracyParameter seg_accuracy_param = 144;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional UniqueLabelParameter unique_label_param = 150;
optional WindowDataParameter window_data_param = 129;
}
然後再在後面定義這個 interp_param參數:
message InterpParameter {
optional int32 height = 1 [default = 0]; // Height of output
optional int32 width = 2 [default = 0]; // Width of output
optional int32 zoom_factor = 3 [default = 1]; // zoom factor
optional int32 shrink_factor = 4 [default = 1]; // shrink factor
optional int32 pad_beg = 5 [default = 0]; // padding at begin of input
optional int32 pad_end = 6 [default = 0]; // padding at end of input
}
第二步,重新編譯:
2.1 先重新編譯 caffe/src/caffe/proto/caffe.proto。
# 确定protobuf的版本
$ protoc --version
libprotoc 2.5.0
# 編譯caffe.proto,需要先進入caffe/src/caffe/proto目錄下,也可以不進入,指定路徑
$ protoc -I=./ --cpp_out=./ ./caffe.proto
# 檢視編譯結果
$ ls
caffe.pb.cc caffe.pb.h caffe.proto
2.2 重新編譯Caffe:
//確定每一步都成功執行
make clean
make -j8
make pycaffe
編譯成功之後就可以一種 interp層了。
第三步 interp層的使用方法:
layer{
bottom:"input"
top:"output"
name:"interp_layer"
type:"Interp"
interp_param{ //注意可按需求改為interp_param{height:60 width:60}(即固定特征圖的尺寸),也可以不需要這個interp_param參數
shrink_factor:4
zoom_factor:3
pad_beg:0
pad_end:0
}
}