最近翻了翻以前做的一些筆記，碰巧翻到了2019年剛開始學習OpenCV時候做的筆記，不知不覺已經過去兩年了，這兩年從一個小白到現在不是太小白的小白o(╥﹏╥)o，在此分享一下，希望能幫助到更多的人。

相關視訊：https://www.bilibili.com/video/BV1FJ411T7W5?p=2

文章目錄

DNN子產品
- Googlenet模型實作圖像分類
- - 介紹：
  - 代碼：
  - 結果展示：
- SSD模型實作對象檢測
- - 介紹：
  - 代碼：
  - 結果展示：
- MobileNetSSD模型實時對象檢測
- - 介紹：
  - 代碼：
  - 結果展示：
- FCN模型圖像分割
- - 介紹：
  - 代碼：
  - 結果展示：
- CNN預測年齡和性别
- - 介紹：
  - 代碼1：
  - 結果1展示：
  - 代碼2：
  - 結果2展示：
- GOTURN模型實作對象跟蹤
- - 介紹：
  - - 算法架構
    - 輸入輸出
  - 代碼：
  - 結果展示：

DNN子產品

Googlenet模型實作圖像分類

介紹：

論文：https://github.com/SnailTyan/deep-learning-papers-translation

這裡有很多翻譯好的論文，很友善。

所需檔案：二進制模型檔案，模型參數描述檔案，分類label檔案。

模型下載下傳:

http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

卷積層提取特征，全連接配接層進行分類。

描述檔案:bvlc_googlenet.prototxt

這個在opencv的源碼裡邊有opencv-3.3.1\samples\data\dnn

模型輸出為一個1000維的向量，代表1000個分類的機率。

代碼：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
using namespace cv;
using namespace std;
using namespace cv::dnn;
String model_bin_file = "model/bvlc_googlenet.caffemodel";
String model_txt_file = "model/bvlc_googlenet.prototxt";
String labels_txt_file = "model/synset_words.txt";
vector<String> readLabels();
int main(int argc, char** argv)
{
	Mat src = imread("pictures/girl.jpg");
	if (src.empty())
	{
		cout << "could not open image……" << endl;
		return -1;
	}
	namedWindow("src", WINDOW_FREERATIO);
	imshow("src", src);
	// 讀取labels
	vector<String> labels = readLabels();
	// 讀取網絡 包括模型描述檔案和和模型檔案
	Net net = readNetFromCaffe(model_txt_file, model_bin_file);
	if (net.empty())
	{
		cout << "net could not load……" << endl;
		return -1;
	}
	Mat inputBlob = blobFromImage(src, 1.0, Size(224, 224), Scalar(104, 117, 123));
	Mat prob;
	for (size_t i = 0; i < 10; i++)
	{
		net.setInput(inputBlob, "data");
		prob = net.forward("prob");	// 輸出為1×1000 1000類的機率
	}
	Mat proMat = prob.reshape(1, 1);	// 單通道 一行
	Point classNumber;
	double classProb;
	minMaxLoc(proMat, NULL, &classProb, NULL, &classNumber);
	int classidx = classNumber.x;
	cout << "current image classification:" << labels.at(classidx).c_str() 
		 << "possible:" << classProb <<  endl;
	putText(src, labels.at(classidx), Point(20, 20), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 0, 255), 1, 8);
	imshow("image", src);
	waitKey(0);
	return 0;
}
vector<String> readLabels()
{
	vector<String> classNames;
	ifstream fin(labels_txt_file.c_str());
	if (!fin.is_open())
	{
		cout << "could not open the file……" << endl;
		exit(-1);
	}
	string name;
	while (!fin.eof())
	{
		getline(fin, name);
		if (name.length())
		{
			classNames.push_back(name.substr(name.find(" " + 1)));// 按空格的位置往後移一位進行分割

		}
	}
	fin.close();
	return classNames;
}

結果展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

SSD模型實作對象檢測

介紹：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

模型下載下傳：

https://github.com/weiliu89/caffe/tree/ssd#models

結構：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

比傳統的R-CNN要好很多。把兩步和為一步，幀率得到了提高。

模型檔案：還是有三個二進制模型檔案，模型參數描述檔案，分類label檔案

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

模型輸出為一個7維向量後四維為檢測出來目标框的矩形坐标倒數第5維為置信度

代碼：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

const size_t width = 300;
const size_t height = 300;
String labelFile = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\labelmap_ilsvrc_det.prototxt";
String modelFile = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel";
String model_text_file = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\deploy.prototxt";
const int meanValues[3] = { 104, 117, 123 };

vector<String> readLabels();
static Mat getMean(const size_t &w, const size_t &h);
static Mat preprocess(const Mat& frame);
int main(int argc, char** argv)
{
	Mat frame = imread("pictures/cat.jpg");
	if (frame.empty())
	{
		cout << "could not open image……" << endl;
		return -1;
	}
	namedWindow("input image", WINDOW_FREERATIO);
	imshow("input image", frame);

	vector<String> objNames = readLabels();
	// import Caffe SSD model
	Net net = readNetFromCaffe(model_text_file, modelFile);
	if (net.empty())
	{
		cout << "read caffe model data failure..." << endl;
		return -1;
	}

	Mat input_image = preprocess(frame);
	Mat blobImage = blobFromImage(input_image);

	net.setInput(blobImage, "data");
	Mat detection = net.forward("detection_out");
	Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
	float confidence_threshold = 0.1;
	for (int i = 0; i < detectionMat.rows; i++)
	{
        // 輸出為一個7維向量 後四維為檢測出來目标框的矩形坐标 倒數第5維為置信度
		float confidence = detectionMat.at<float>(i, 2);
		if (confidence > confidence_threshold)
		{
			size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
			float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
			float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
			float br_x = detectionMat.at<float>(i, 5) * frame.cols;
			float br_y = detectionMat.at<float>(i, 6) * frame.rows;

			Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
			rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
			putText(frame, format("%s", objNames[objIndex].c_str()), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
		}
	}
	imshow("ssd-demo", frame);

	waitKey(0);
	return 0;
}

vector<String> readLabels()
{
	vector<String> objNames;
	ifstream fin(labelFile);
	if (!fin.is_open())
	{
		cout << "could not load labeFile……" << endl;
		exit(-1);
	}
	string name;
	while (!fin.eof())
	{
		getline(fin, name);
		if (name.length() && (name.find("display_name:") == 2))
		{
			string temp = name.substr(17);
			temp.replace(temp.end() - 1, temp.end(), "");
			objNames.push_back(temp);
		}
	}
	return objNames;
}

Mat getMean(const size_t& w, const size_t& h)
{
	Mat mean;
	vector<Mat> channels;
	for (size_t i = 0; i < 3; i++)
	{
		Mat channel(h, w, CV_32F, Scalar(meanValues[i]));
		channels.push_back(channel);
	}
	merge(channels, mean);
	return mean;
}

Mat preprocess(const Mat& frame)
{
	Mat preprocessed;
	frame.convertTo(preprocessed, CV_32F);
	resize(preprocessed, preprocessed, Size(width, height));	// 300*300 image
	Mat mean = getMean(width, height);
	subtract(preprocessed, mean, preprocessed);
	return preprocessed;
}

結果展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

MobileNetSSD模型實時對象檢測

介紹：

對SSD模型進行了簡化，從1000個分類縮減為20個。

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

還是模型二進制檔案，模型描述檔案，label檔案。

模型下載下傳位址：https://github.com/PINTO0309/MobileNet-SSD-RealSense/blob/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel

注意要使用deploy版本的。

模型輸出也為一個7維向量後四維為檢測出來目标框的矩形坐标倒數第5維為置信度

代碼：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

const size_t width = 300;
const size_t height = 300;
// 下面這兩個參數是官方的參數
const float meanVal = 127.5;
const float scaleFactor = 0.0078;
String labelFile = "model/mobileNetSSD/pascal-classes.txt";
String modelFile = "model/mobileNetSSD/MobileNetSSD_deploy.caffemodel";
String model_text_file = "model/mobileNetSSD/MobileNetSSD_deploy.prototxt";

vector<String> readLabels();
int main(int argc, char** argv)
{
	VideoCapture capture;
	capture.open("pictures/vtest.avi");
	namedWindow("input", CV_WINDOW_FREERATIO);
	namedWindow("ssd-video-demo", CV_WINDOW_FREERATIO);
	int w = capture.get(CAP_PROP_FRAME_WIDTH);
	int h = capture.get(CAP_PROP_FRAME_HEIGHT);
	printf("frame width:%d, frame height:%d\n", w, h);

	// set up net
	Net net = readNetFromCaffe(model_text_file, modelFile);
	if (net.empty())
	{
		cout << "could not load NetModel……" << endl;
		return -1;
	}

	// read the label
	vector<String> classNames = readLabels();
	Mat frame;
	int i = 0;
	while (capture.read(frame))
	{
		i++;
		imshow("input", frame);
		// 預測
		double t1 = (double)getTickCount();
		Mat inputblob = blobFromImage(frame, scaleFactor, Size(width, height), meanVal, false);
		net.setInput(inputblob, "data");
		Mat detection = net.forward("detection_out");
		double t2 = (double)getTickCount();
		cout << "第" << i << "幀" << "耗費時間：" << (t2 - t1) / getTickFrequency() << "s\n" << endl;
		// 繪制
		Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
		float confidence_threshold = 0.25;
		for (int i = 0; i < detectionMat.rows; i++) {
			float confidence = detectionMat.at<float>(i, 2);
			if (confidence > confidence_threshold) {
				size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
				float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
				float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
				float br_x = detectionMat.at<float>(i, 5) * frame.cols;
				float br_y = detectionMat.at<float>(i, 6) * frame.rows;

				Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
				rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
				//putText(frame, format("%s", classNames[objIndex]), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
				putText(frame, classNames[objIndex], Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
			}
		}
		imshow("ssd-video-demo", frame);
		char c = waitKey(50);
		if (c == 27) // ESC
		{
			break;
		}
	}
	waitKey(0);
	return 0;
}

vector<String> readLabels()
{
	vector<String> objNames;
	ifstream fin(labelFile);
	if (!fin.is_open())
	{
		cout << "could not load labeFile……" << endl;
		exit(-1);
	}
	string name;
	while (!fin.eof())
	{
		getline(fin, name);
		if (name.length())
		{
			string temp = name.substr(0, name.find(" ", 0));
			objNames.push_back(temp);
		}
	}
	return objNames;
}

結果展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

FCN模型圖像分割

介紹：

論文：https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf

全卷積網絡

模型與資料：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

還是三個檔案：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

模型下載下傳位址：https://github.com/shelhamer/fcn.berkeleyvision.org

模型輸出為21×500×500的數組。21為channel，也就是類别。500×500為rows×cols，對應于圖檔中的每一個像素值。

代碼：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <string.h>
#include <stdio.h>
using namespace std;
using namespace cv;
using namespace cv::dnn;

const size_t width = 500;
const size_t height = 500;
String labelFile = "model\\FCN\\pascal-classes.txt";
String modelFile = "model\\FCN\\fcn8s-heavy-pascal.caffemodel";
String model_text_file = "model\\FCN\\fcn8s-heavy-pascal.prototxt";
Scalar meanValues = Scalar(104, 117, 123);

vector<Vec3b> readColors();
vector<String> readLabels();

int main(int argc, char** argv)
{
	Mat frame = imread("pictures/rgb.jpg");
	//Mat frame = imread("E:/Dataset/Flange/picture_sample/水漬and砂眼/test2.jpg");
	Mat img_gray;
	cvtColor(frame, img_gray, COLOR_BGR2GRAY);
	if (frame.empty())
	{
		cout << "could not open image……" << endl;
		return -1;
	}
	namedWindow("input image", WINDOW_FREERATIO);
	imshow("input image", frame);
	resize(frame, frame, Size(500, 500));
	vector<Vec3b> colors = readColors();

	// import Caffe SSD model
	Net net = readNetFromCaffe(model_text_file, modelFile);
	if (net.empty())
	{
		cout << "read caffe model data failure..." << endl;
		return -1;
	}

	Mat blobImage = blobFromImage(frame);

	// 預測
	net.setInput(blobImage, "data");
	Mat score = net.forward("score");

	// 分割并顯示
	const int rows = score.size[2];
	const int cols = score.size[3];
	const int chns = score.size[1];
	Mat maxCl(rows, cols, CV_8UC1);	// 該像素處機率最大的那個channel 類别
	Mat maxVal(rows, cols, CV_32FC1);	// 該像素處機率最大的那個channel所對應的的機率值 該類别所對應的機率	這個值下邊其實沒用到

	// setup LUT
	for (int c = 0; c < chns; c++)
	{
		for (int row = 0; row < rows; row++)
		{
			const float* ptrScore = score.ptr<float>(0, c, row);
			
			uchar* ptrMaxCl = maxCl.ptr<uchar>(row);
			float* ptrMaxVal = maxVal.ptr<float>(row);

			for (int col = 0; col < cols; col++)
			{
				if (ptrScore[col] > ptrMaxVal[col])
				{
					ptrMaxVal[col] = ptrScore[col];	// 機率
					ptrMaxCl[col] = (uchar)c;	// 類别
				}
			}
		}
	}

	// look up colors
	Mat result = Mat::zeros(rows, cols, CV_8UC3);
	for (int row = 0; row < rows; row++) {
		const uchar* ptrMaxCl = maxCl.ptr<uchar>(row);
		Vec3b* ptrColor = result.ptr<Vec3b>(row);
		for (int col = 0; col < cols; col++)
		{
			ptrColor[col] = colors[ptrMaxCl[col]];	// 取出每一個像素類别所對應的顔色 共21類
		}
	}
	Mat dst;
	addWeighted(frame, 0.3, result, 0.7, 0, dst);
	imshow("FCN-demo", dst);

	waitKey(0);
	return 0;
}

vector<Vec3b> readColors()
{
	vector<Vec3b> objColors;
	ifstream fin(labelFile);
	if (!fin.is_open())
	{
		cout << "could not load labeFile……" << endl;
		exit(-1);
	}
	string line;
	while (!fin.eof())
	{
		getline(fin, line);
		if (line.length())
		{
			//string temp = color.substr(color.find(" ") + 1);
			stringstream ss(line);
			string name;
			int temp;
			Vec3b color;
			ss >> name;
			ss >> temp;
			color[0] = (uchar)temp;
			ss >> temp;
			color[1] = (uchar)temp;
			ss >> temp;
			color[2] = (uchar)temp;
			objColors.push_back(color);
		}
	}
	return objColors;
}

vector<String> readLabels()
{
	vector<String> objNames;
	ifstream fin(labelFile);
	if (!fin.is_open())
	{
		cout << "could not load labeFile……" << endl;
		exit(-1);
	}
	string name;
	while (!fin.eof())
	{
		getline(fin, name);
		if (name.length() && (name.find("display_name:") == 2))
		{
			string temp = name.substr(17);
			temp.replace(temp.end() - 1, temp.end(), "");
			objNames.push_back(temp);
		}
	}
	return objNames;
}

結果展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

CNN預測年齡和性别

介紹：

論文：https://talhassner.github.io/home/projects/cnn_agegender/CVPR2015_CNN_AgeGenderEstimation.pdf

模型以及描述檔案下載下傳：

https://talhassner.github.io/home/publication/2015_CVPR

使用模型的方式與之前的差不多，我自己寫了一個，但是感覺年齡識别結果相當不準。

代碼1：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

string age_labels[] = { "0-2", "4-6", "8-13", "15-20", "25-32", "38-43", "48-53", "60-"};
string age_model_file = "model/ageClassication/age_net.caffemodel";
string age_model_prototxt = "model/ageClassication/deploy_age.prototxt";

string gender_labels[] = { "man", "woman"};
string gender_model_file = "model/genderClassication/gender_net.caffemodel";
string gender_model_prototxt = "model/genderClassication/deploy_gender.prototxt";
int main(int argc, char** argv)
{
	system("color 0A");
	// 加載圖檔
	Mat img = imread("pictures/boy.jpg");
	if (img.empty())
	{
		cout << "could not load img……" << endl;
		return -1;
	}
	namedWindow("input", CV_WINDOW_AUTOSIZE);
	imshow("input", img);

	// 加載網絡模型
	Net age_net = readNetFromCaffe(age_model_prototxt, age_model_file);
	if (age_net.empty())
	{
		cout << "could not load Net age_model……" << endl;
		exit(-1);
	}

	Net gender_net = readNetFromCaffe(gender_model_prototxt, gender_model_file);
	if (gender_net.empty())
	{
		cout << "could not load Net gender_model……" << endl;
		exit(-1);
	}

	// 預測
	Mat input = blobFromImage(img, 1.0, Size(227, 227));

	age_net.setInput(input, "data");
	Mat age_prob = age_net.forward("prob");

	gender_net.setInput(input, "data");
	Mat gender_prob = gender_net.forward("prob");

	// 在圖像上表示結果
	Point age_class_Number;
	double age_class_Prob;
	Mat age_probMat = age_prob.reshape(1, 1);
	minMaxLoc(age_probMat, NULL, &age_class_Prob, NULL, &age_class_Number);
	int age_index = age_class_Number.x;
	cout << "對象年齡為：" << age_labels[age_index] << endl;
	cout << "機率為：" << age_class_Prob << endl;
	
	Point gender_class_Number;
	double gender_class_Prob;
	Mat gender_probMat = gender_prob.reshape(1, 1);
	minMaxLoc(gender_prob, NULL, &gender_class_Prob, NULL, &gender_class_Number);
	int gender_index = gender_class_Number.x;
	cout << "對象性别為：" << gender_labels[gender_index] << endl;
	cout << "機率為：" << gender_class_Prob << endl;

	putText(img, "age:" + age_labels[age_index], Point(20, 20), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 0, 255), 1, 8);
	putText(img, "gender:" + gender_labels[gender_index], Point(20, 40), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 255, 0), 1, 8);

	namedWindow("results", CV_WINDOW_AUTOSIZE);
	imshow("results", img);



	waitKey(0);
	return 0;
}

結果1展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

把小孩識别成38-43歲……

視訊裡邊用了一個檔案haarcascade_frontalface_alt_tree.xml，先把人臉部分提取出來了：

主要使用了一個多尺度檢測的函數detectMultiScale()，得到人臉所在的矩形區域，能夠檢測出來一張圖檔中的多張人臉。

然後直接把人臉部分輸入，其他地方和上面的差不多。

代碼2：

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;
String haar_file = "D:/opencv/build/etc/haarcascades/haarcascade_frontalface_alt_tree.xml";
String age_model = "model/ageClassication/age_net.caffemodel";
String age_text = "model/ageClassication/deploy_age.prototxt";

String gender_model = "model/genderClassication/gender_net.caffemodel";
String gender_text = "model/genderClassication/deploy_gender.prototxt";

void predict_age(Net& net, Mat image);
void predict_gender(Net& net, Mat image);
int main(int argc, char** argv) {
	Mat src = imread("pictures/mutiFace1.jpg");
	if (src.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input", CV_WINDOW_AUTOSIZE);
	imshow("input", src);
	// 檢測人臉區域
	CascadeClassifier detector;
	detector.load(haar_file);
	vector<Rect> faces;
	Mat gray;
	cvtColor(src, gray, COLOR_BGR2GRAY);
	detector.detectMultiScale(gray, faces, 1.02, 1, 0, Size(40, 40), Size(1000, 1000));

	// 加載網絡模型
	Net age_net = readNetFromCaffe(age_text, age_model);
	Net gender_net = readNetFromCaffe(gender_text, gender_model);

	for (size_t t = 0; t < faces.size(); t++) {
		rectangle(src, faces[t], Scalar(30, 255, 30), 2, 8, 0);
		predict_age(age_net, src(faces[t]));	// 将人臉區域作為感興趣區域輸入網絡
		predict_gender(age_net, src(faces[t]));
	}
	imshow("age-gender-prediction-demo", src);

	waitKey(0);
	return 0;
}

vector<String> ageLabels() {
	vector<String> ages;
	ages.push_back("0-2");
	ages.push_back("4 - 6");
	ages.push_back("8 - 13");
	ages.push_back("15 - 20");
	ages.push_back("25 - 32");
	ages.push_back("38 - 43");
	ages.push_back("48 - 53");
	ages.push_back("60-");
	return ages;
}

void predict_age(Net& net, Mat image) {
	// 輸入
	Mat blob = blobFromImage(image, 1.0, Size(227, 227));
	net.setInput(blob, "data");
	// 預測分類
	Mat prob = net.forward("prob");
	Mat probMat = prob.reshape(1, 1);
	Point classNum;
	double classProb;

	vector<String> ages = ageLabels();
	minMaxLoc(probMat, NULL, &classProb, NULL, &classNum);
	int classidx = classNum.x;
	putText(image, format("age:%s", ages.at(classidx).c_str()), Point(2, 10), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1);
}

void predict_gender(Net& net, Mat image) {
	// 輸入
	Mat blob = blobFromImage(image, 1.0, Size(227, 227));
	net.setInput(blob, "data");
	// 預測分類
	Mat prob = net.forward("prob");
	Mat probMat = prob.reshape(1, 1);
	putText(image, format("gender:%s", (probMat.at<float>(0, 0) > probMat.at<float>(0, 1) ? "M" : "F")),
		Point(2, 20), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1);
}

結果2展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

GOTURN模型實作對象跟蹤

介紹：

GOTURN（Generic Object Tricking Using Regression Networks）使用回歸網絡進行追蹤

資料參考：https://zhuanlan.zhihu.com/p/25338674

算法架構

整個算法的架構其實非常簡單：輸入目前幀和前一幀進入網絡，輸出目前幀bounding-box的位置。

輸入輸出

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

網絡輸出目标在search region上的相對坐标（top-left和bottom-right）。

模型下載下傳：

https://github.com/opencv/opencv_extra/tree/c4219d5eb3105ed8e634278fad312a1a8d2c182d/testdata/tracking

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

note: 這四個壓縮包都得下載下傳，否則會解壓出錯。

可以參考opencv的samples裡邊的例子：https://github.com/opencv/opencv_contrib/blob/3.3.1/modules/tracking/samples/goturnTracker.cpp

該網絡輸入為上一幀要追蹤的區域data1和目前幀區域data2，輸出為單通道4×1的Mat：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

表示上一幀中要追蹤的box在目前幀中預測的box的位置（左上角和右下角坐标）。

輸入：

input: "data1"
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

input: "data2"
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

代碼：

#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/video/video.hpp>
#include <iostream>
#include <fstream>

using namespace std;
using namespace cv;
using namespace cv::dnn;

string model_file = "model/GOTURN/goturn.caffemodel";
string model_prototxt = "model/GOTURN/goturn.prototxt";

Net net;
Rect trackObjects(Mat& frame, Mat& prevFrame);
Mat frame, prevFrame;
Rect prevBB;
int main(int argc, char** argv) {
	net = readNetFromCaffe(model_prototxt, model_file);
	if (net.empty())
	{
		cout << "could not load model file……";
		exit(-1);
	}
	VideoCapture capture;
	capture.open("pictures/vtest.avi");
	capture.read(frame);
	frame.copyTo(prevFrame);
	prevBB = selectROI(frame, false, false);
	namedWindow("frame", CV_WINDOW_AUTOSIZE);
	while (capture.read(frame)) {
		Rect currentBB = trackObjects(frame, prevFrame);
		rectangle(frame, currentBB, Scalar(0, 0, 255), 2, 8, 0);

		// ready for next frame
		frame.copyTo(prevFrame);
		prevBB.x = currentBB.x;
		prevBB.y = currentBB.y;
		prevBB.width = currentBB.width;
		prevBB.height = currentBB.height;

		imshow("frame", frame);
		char c = waitKey(50);
		if (c == 27) {
			break;
		}
	}
}



Rect trackObjects(Mat& frame, Mat& prevFrame) {
	Rect rect;
	int INPUT_SIZE = 227;
	//Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
	Mat curFrame = frame.clone();
	Rect2d curBB;

	float padTargetPatch = 2.0;
	Rect2f searchPatchRect, targetPatchRect;
	Point2f currCenter, prevCenter;
	Mat prevFramePadded, curFramePadded;
	Mat searchPatch, targetPatch;

	// 上一幀box的中心
	prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
	prevCenter.y = (float)(prevBB.y + prevBB.height / 2);

	// 接受padTargetPatch倍的背景
	targetPatchRect.width = (float)(prevBB.width * padTargetPatch);
	targetPatchRect.height = (float)(prevBB.height * padTargetPatch);
	targetPatchRect.x = prevCenter.x + targetPatchRect.width / 2.0;	// 這裡因為下面使用的是邊界填充之後的prevFramePadded，等于說又加了個targetPatchRect.width，是以這裡是加targetPatchRect.width / 2.0
	targetPatchRect.y = prevCenter.y + targetPatchRect.height / 2.0;

	// 對上一幀邊界進行填充，并提取出框出的目标targetPatch
	copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	targetPatch = prevFramePadded(targetPatchRect).clone();
	// 對目前幀邊界進行填充，并提取出目标targetPatch
	copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	searchPatch = curFramePadded(targetPatchRect).clone();

	//Preprocess
	//Resize
	resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE));
	resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE));

	//Mean Subtract
	targetPatch = targetPatch - 128;
	searchPatch = searchPatch - 128;

	//Convert to Float type
	targetPatch.convertTo(targetPatch, CV_32F);
	searchPatch.convertTo(searchPatch, CV_32F);

	Mat targetBlob = blobFromImage(targetPatch);
	Mat searchBlob = blobFromImage(searchPatch);

	net.setInput(targetBlob, "data1");
	net.setInput(searchBlob, "data2");

	Mat res = net.forward("scale");
	Mat resMat = res.reshape(1, 1);
	//printf("width : %d, height : %d\n", (resMat.at<float>(2) - resMat.at<float>(0)), (resMat.at<float>(3) - resMat.at<float>(1)));

	curBB.x = (double)targetPatchRect.x + (double)(resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - (double)targetPatchRect.width;
	curBB.y = (double)targetPatchRect.y + (double)(resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - (double)targetPatchRect.height;
	curBB.width = (resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE;
	curBB.height = (resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE;

	//Predicted BB
	Rect boundingBox = curBB;
	return boundingBox;
}

結果展示：

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

OpenCV學習筆記 - DNN子產品使用(含源碼、詳細解釋)DNN子產品

文章目錄

DNN子產品

Googlenet模型實作圖像分類

介紹：

代碼：

結果展示：

SSD模型實作對象檢測

介紹：

代碼：

結果展示：

MobileNetSSD模型實時對象檢測

介紹：

代碼：

結果展示：

FCN模型圖像分割

介紹：

代碼：

結果展示：

CNN預測年齡和性别

介紹：

代碼1：

結果1展示：

代碼2：

結果2展示：

GOTURN模型實作對象跟蹤

介紹：

算法架構

輸入輸出

代碼：

結果展示：

繼續閱讀