struck（結構化SVM用于視覺跟蹤）--源代碼詳解--main.cpp

struck 利用結構化SVM來實作視覺跟蹤，在深度學習流行起來之前，struck是視覺跟蹤領域效果最好的方法。深度學習流行之後，利用泛化的卷積特征能夠得到很好的效果。struck的優點在于，它可以使用任意的特征來實作跟蹤，是以它可以利用卷積神經網絡提取的特征，然後結合結構化SVM來實作視覺跟蹤，這樣的效果說不定更好。

struck的源碼是C++實作的，作者寫的很好，思路清晰，代碼結構清晰，而且與論文中的相符，沒有那麼多小trick，結果比較可靠。

下面從它的主函數開始，分析這份源碼是如何實作的：

main.cpp

/* 
 * Struck: Structured Output Tracking with Kernels
 * 
 * Code to accompany the paper:
 *   Struck: Structured Output Tracking with Kernels
 *   Sam Hare, Amir Saffari, Philip H. S. Torr
 *   International Conference on Computer Vision (ICCV), 2011
 * 
 * Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK
 * 
 * This file is part of Struck.
 * 
 * Struck is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Struck is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Struck.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */
 
#include "Tracker.h"
#include "Config.h"

#include <iostream>
#include <fstream>

#include <opencv/cv.h>
#include <opencv/highgui.h>

using namespace std;
using namespace cv;

static const int kLiveBoxWidth = 80;
static const int kLiveBoxHeight = 80;

void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour)
{
	IntRect r(rRect);
	rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour);
}

int main(int argc, char* argv[])
{

//這幾句話沒啥作用，我給注釋掉
#ifndef WIN32
	string programName = argv[0];
	programName = programName.substr(programName.find_first_of('/'));
	cout << "programName: " << programName << endl;
#endif

	// read config file
	string configPath = "../docs/config.txt";
	Config conf(configPath);//作者定義的類Config 讀取了所有的配置資訊，并且cout輸出
	cout << conf << endl;
	
	if (conf.features.size() == 0)
	{
		cout << "error: no features specified in config" << endl;
		return EXIT_FAILURE;
	}

	if (argc > 1)
	{
		conf.sequenceName = argv[1];
	}

	ofstream outFile;//定義一個輸出檔案流，輸出結果
	if (conf.resultsPath != "")
	{
#ifdef WIN32
		string resultsPath = conf.resultsPath + "/" + conf.sequenceName + "_result.txt";
#else
		string resultsPath = conf.resultsPath + "/" + conf.sequenceName + "_" + programName + "Result.txt";
		
#endif

		outFile.open(resultsPath, ios::out);
		if (!outFile)
		{
			cout << "error: could not open results file: " << conf.resultsPath << endl;
			return EXIT_FAILURE;
		}
	}
	
	// if no sequence specified then use the camera
	bool useCamera = (conf.sequenceName == "");//根據在config.txt中是否給出視訊名稱，判斷是否使用攝像頭
	
	VideoCapture cap;
	
	int startFrame = -1;
	int endFrame = -1;
	FloatRect initBB;//這是一個模闆類，
	string imgFormat;
	float scaleW = 1.f;
	float scaleH = 1.f;
	
	if (useCamera)//使用攝像頭
	{
		if (!cap.open(0))
		{
			cout << "error: could not start camera capture" << endl;
			return EXIT_FAILURE;
		}
		startFrame = 0;
		endFrame = INT_MAX;
		Mat tmp;
		cap >> tmp;//讀入一幀視訊
		scaleW = (float)conf.frameWidth/tmp.cols;//config中寬/讀入視訊的寬，比率
		scaleH = (float)conf.frameHeight/tmp.rows;

		/*該函數，創造了一個矩形，左上角在（120,80）,80*80的矩形*/
		initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight);
		cout << "press 'i' to initialise tracker" << endl;
	}
	else//使用視訊
	{
		// parse frames file
		string framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+"frames.txt";
		ifstream framesFile(framesFilePath.c_str(), ios::in);
		if (!framesFile)
		{
			cout << "error: could not open sequence frames file: " << framesFilePath << endl;
			return EXIT_FAILURE;
		}
		string framesLine;
		getline(framesFile, framesLine);
		printf("%s", framesLine.c_str());
		sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame);
		if (framesFile.fail() || startFrame == -1 || endFrame == -1)
		{
			cout << "error: could not parse sequence frames file" << endl;
			return EXIT_FAILURE;
		}
		
		imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/img/%04d.jpg";//qyy changed
		
		// read first frame to get size
		char imgPath[256];
		sprintf(imgPath, imgFormat.c_str(), startFrame);
		Mat tmp = cv::imread(imgPath, 0);
		scaleW = (float)conf.frameWidth/tmp.cols;
		scaleH = (float)conf.frameHeight/tmp.rows;
		
		// read init box from ground truth file
		string gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+"groundtruth_rect.txt";//qyy changed
		ifstream gtFile(gtFilePath.c_str(), ios::in);
		if (!gtFile)
		{
			cout << "error: could not open sequence gt file: " << gtFilePath << endl;
			return EXIT_FAILURE;
		}
		string gtLine;
		getline(gtFile, gtLine);
		float xmin = -1.f;
		float ymin = -1.f;
		float width = -1.f;
		float height = -1.f;
		sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height);
		if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f)
		{
			cout << "error: could not parse sequence gt file" << endl;
			return EXIT_FAILURE;
		}
		initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH);
	}
	
	Tracker tracker(conf);//使用conf類，初始化Tracker類
	if (!conf.quietMode)//quietMode模式下，不顯示結果，隻運算
	{
		namedWindow("result");
	}
	
	Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3);
	bool paused = false;
	bool doInitialise = false;
	srand(conf.seed);
	for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd)
	{
		cout << "frame num is: " << frameInd << endl;//qyy
		Mat frame;
		if (useCamera)
		{
			Mat frameOrig;
			cap >> frameOrig;
			resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
			//imshow("result",frame);//qyy
			//waitKey(0);//qyy
			flip(frame, frame, 1);//作者把視訊左右對稱翻轉了，不知道為什麼這麼做？
			//imshow("result", frame);//qyy
			//waitKey(0);//qyy
			frame.copyTo(result);
			if (doInitialise)
			{
				if (tracker.IsInitialised())
				{
					tracker.Reset();
				}
				else
				{
					tracker.Initialise(frame, initBB);
				}
				doInitialise = false;
			}
			else if (!tracker.IsInitialised())
			{
				rectangle(result, initBB, CV_RGB(255, 255, 255));//沒有初始化，就在result上畫白色框框
			}
		}
		else
		{			
			char imgPath[256];
			sprintf(imgPath, imgFormat.c_str(), frameInd);
			Mat frameOrig = cv::imread(imgPath, 0);//第二個參數flag指定讀取的顔色類型，=0表示讀取為灰階圖像
			cout << "frameOrig.channels: " << frameOrig.channels() << endl;//qyy
			if (frameOrig.empty())
			{
				cout << "error: could not read frame: " << imgPath << endl;
				return EXIT_FAILURE;
			}
			resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
			cvtColor(frame, result, CV_GRAY2RGB);//作者讀進來的時候是灰階圖像，為了顯示轉換成3通道都是灰階圖
		
			if (frameInd == startFrame)//如果是第一幀，初始化
			{
				tracker.Initialise(frame, initBB);
			}
		}
		
		if (tracker.IsInitialised())//如果初始化了，就開始跟蹤
		{
			tracker.Track(frame);//跟蹤程式，把tracker當做一個類來對待，很清晰明了啊，贊一個；算法都在這裡面實作
			
			if (!conf.quietMode && conf.debugMode)
			{
				tracker.Debug();//debug模式下，可以開啟很多額外的視窗顯示
			}
			
			rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0));//使用綠色框，畫出跟蹤的效果
			
			if (outFile)//這裡是得到的矩形框，存儲到txt文本中
			{
				const FloatRect& bb = tracker.GetBB();
				outFile << bb.XMin() / scaleW << "," << bb.YMin() / scaleH << "," << bb.Width() / scaleW << "," << bb.Height() / scaleH << flush << endl;
				cout << "cout to file: " << bb.XMin() / scaleW << "," << bb.YMin() / scaleH << "," << bb.Width() / scaleW << "," << bb.Height() / scaleH << endl;
			}
		}
		
		if (!conf.quietMode)//如果使用的是攝像頭，作者提供了幾個按鍵來選擇是否初始化，我用的是OTB資料集，就不管這個了
		{
			imshow("result", result);
			int key = waitKey(paused ? 0 : 1);
			if (key != -1)
			{
				if (key == 27 || key == 113) // esc q
				{
					break;
				}
				else if (key == 112) // p
				{
					paused = !paused;
				}
				else if (key == 105 && useCamera)//i
				{
					doInitialise = true;
					cout << "initialised !" << endl;//qyy
				}
			}
			if (conf.debugMode && frameInd == endFrame)
			{
				cout << "\n\nend of sequence, press any key to exit" << endl;
				//waitKey();
			}
		}
	}
	
	if (outFile.is_open())
	{
		outFile.close();
	}
	
	return EXIT_SUCCESS;
}

是以，後面我主要關注tracker這個類做了什麼，我們看到在main.cpp中調用了tracker.Initialize Debug Track這幾個成員函數，是以這幾個函數是作者算法實作的關鍵。

struck（結構化SVM用于視覺跟蹤）--源代碼詳解--main.cpp

繼續閱讀

簡單文檔分類——樸素貝葉斯算法樸素貝葉斯算法簡單文檔分類執行個體步驟總結樸素貝葉斯分類調用(sklearn)

【分類算法】什麼是分類算法定義分類與聚類分類過程方法

分類算法的評價名額

K-近鄰算法以及圖像分類應用

weka之NB算法

使用weka的select attribute

weka中分類器算法

在weka中內建自己的算法

【多變量線性回歸】學習記錄序思路實作終

申請評分模型拒絕推斷（RI）方法申請評分模型拒絕推斷（RI）方法

【人工智能行業大師訪談1】吳恩達采訪 Geoffery Hinton

【趨高機器視覺】機器視覺技術原了解析及解決方案

吳恩達 coursera ML 第七課總結+作業答案前言目錄正文模型表示作業答案

XGBoost Plotting API以及GBDT組合特征實踐 XGBoost Plotting API以及GBDT組合特征實踐

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

2021-2025年中國運動療法（KT）帶行業市場供需與戰略研究報告