轉自:http://blog.csdn.net/zouxy09/article/details/7893081
下面是自己在看論文和這些大牛的分析過程中,對代碼進行了一些了解,但是由于自己接觸圖像處理和機器視覺沒多久,另外由于自己程式設計能力比較弱,是以分析過程可能會有不少的錯誤,希望各位不吝指正。而且,因為程式設計很多地方不懂,是以注釋得非常亂,還海涵。
TLD.h
[cpp] view plain copy print ?
- #include <opencv2/opencv.hpp>
- #include <tld_utils.h>
- #include <LKTracker.h>
- #include <FerNNClassifier.h>
- #include <fstream>
- //Bounding Boxes
- struct BoundingBox : public cv::Rect {
- BoundingBox(){}
- BoundingBox(cv::Rect r): cv::Rect(r){} //繼承的話需要初始化基類
- public:
- float overlap; //Overlap with current Bounding Box
- int sidx; //scale index
- };
- //Detection structure
- struct DetStruct {
- std::vector<int> bb;
- std::vector<std::vector<int> > patt;
- std::vector<float> conf1;
- std::vector<float> conf2;
- std::vector<std::vector<int> > isin;
- std::vector<cv::Mat> patch;
- };
- //Temporal structure
- struct TempStruct {
- std::vector<std::vector<int> > patt;
- std::vector<float> conf;
- };
- struct OComparator{ //比較兩者重合度
- OComparator(const std::vector<BoundingBox>& _grid):grid(_grid){}
- std::vector<BoundingBox> grid;
- bool operator()(int idx1,int idx2){
- return grid[idx1].overlap > grid[idx2].overlap;
- }
- };
- struct CComparator{ //比較兩者确信度?
- CComparator(const std::vector<float>& _conf):conf(_conf){}
- std::vector<float> conf;
- bool operator()(int idx1,int idx2){
- return conf[idx1]> conf[idx2];
- }
- };
- class TLD{
- private:
- cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
- FerNNClassifier classifier;
- LKTracker tracker;
- //下面這些參數通過程式開始運作時讀入parameters.yml檔案進行初始化
- ///Parameters
- int bbox_step;
- int min_win;
- int patch_size;
- //initial parameters for positive examples
- //從第一幀得到的目标的bounding box中(檔案讀取或者使用者框定),經過幾何變換得
- //到 num_closest_init * num_warps_init 個正樣本
- int num_closest_init; //最近鄰視窗數 10
- int num_warps_init; //幾何變換數目 20
- int noise_init;
- float angle_init;
- float shift_init;
- float scale_init;
- 從跟蹤得到的目标的bounding box中,經過幾何變換更新正樣本(添加到線上模型?)
- //update parameters for positive examples
- int num_closest_update;
- int num_warps_update;
- int noise_update;
- float angle_update;
- float shift_update;
- float scale_update;
- //parameters for negative examples
- float bad_overlap;
- float bad_patches;
- ///Variables
- //Integral Images 積分圖像,用以計算2bitBP特征(類似于haar特征的計算)
- //Mat最大的優勢跟STL很相似,都是對記憶體進行動态的管理,不需要之前使用者手動的管理記憶體
- cv::Mat iisum;
- cv::Mat iisqsum;
- float var;
- //Training data
- //std::pair主要的作用是将兩個資料組合成一個資料,兩個資料可以是同一類型或者不同類型。
- //pair實質上是一個結構體,其主要的兩個成員變量是first和second,這兩個變量可以直接使用。
- //在這裡用來表示樣本,first成員為 features 特征點數組,second成員為 labels 樣本類别标簽
- std::vector<std::pair<std::vector<int>,int> > pX; //positive ferns <features,labels=1> 正樣本
- std::vector<std::pair<std::vector<int>,int> > nX; // negative ferns <features,labels=0> 負樣本
- cv::Mat pEx; //positive NN example
- std::vector<cv::Mat> nEx; //negative NN examples
- //Test data
- std::vector<std::pair<std::vector<int>,int> > nXT; //negative data to Test
- std::vector<cv::Mat> nExT; //negative NN examples to Test
- //Last frame data
- BoundingBox lastbox;
- bool lastvalid;
- float lastconf;
- //Current frame data
- //Tracker data
- bool tracked;
- BoundingBox tbb;
- bool tvalid;
- float tconf;
- //Detector data
- TempStruct tmp;
- DetStruct dt;
- std::vector<BoundingBox> dbb;
- std::vector<bool> dvalid; //檢測有效性??
- std::vector<float> dconf; //檢測确信度??
- bool detected;
- //Bounding Boxes
- std::vector<BoundingBox> grid;
- std::vector<cv::Size> scales;
- std::vector<int> good_boxes; //indexes of bboxes with overlap > 0.6
- std::vector<int> bad_boxes; //indexes of bboxes with overlap < 0.2
- BoundingBox bbhull; // hull of good_boxes //good_boxes 的 殼,也就是視窗的邊框
- BoundingBox best_box; // maximum overlapping bbox
- public:
- //Constructors
- TLD();
- TLD(const cv::FileNode& file);
- void read(const cv::FileNode& file);
- //Methods
- void init(const cv::Mat& frame1,const cv::Rect &box, FILE* bb_file);
- void generatePositiveData(const cv::Mat& frame, int num_warps);
- void generateNegativeData(const cv::Mat& frame);
- void processFrame(const cv::Mat& img1,const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2,
- BoundingBox& bbnext,bool& lastboxfound, bool tl,FILE* bb_file);
- void track(const cv::Mat& img1, const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2);
- void detect(const cv::Mat& frame);
- void clusterConf(const std::vector<BoundingBox>& dbb,const std::vector<float>& dconf,std::vector<BoundingBox>& cbb,std::vector<float>& cconf);
- void evaluate();
- void learn(const cv::Mat& img);
- //Tools
- void buildGrid(const cv::Mat& img, const cv::Rect& box);
- float bbOverlap(const BoundingBox& box1,const BoundingBox& box2);
- void getOverlappingBoxes(const cv::Rect& box1,int num_closest);
- void getBBHull();
- void getPattern(const cv::Mat& img, cv::Mat& pattern,cv::Scalar& mean,cv::Scalar& stdev);
- void bbPoints(std::vector<cv::Point2f>& points, const BoundingBox& bb);
- void bbPredict(const std::vector<cv::Point2f>& points1,const std::vector<cv::Point2f>& points2,
- const BoundingBox& bb1,BoundingBox& bb2);
- double getVar(const BoundingBox& box,const cv::Mat& sum,const cv::Mat& sqsum);
- bool bbComp(const BoundingBox& bb1,const BoundingBox& bb2);
- int clusterBB(const std::vector<BoundingBox>& dbb,std::vector<int>& indexes);
- };
#include <opencv2/opencv.hpp>
#include <tld_utils.h>
#include <LKTracker.h>
#include <FerNNClassifier.h>
#include <fstream>
//Bounding Boxes
struct BoundingBox : public cv::Rect {
BoundingBox(){}
BoundingBox(cv::Rect r): cv::Rect(r){} //繼承的話需要初始化基類
public:
float overlap; //Overlap with current Bounding Box
int sidx; //scale index
};
//Detection structure
struct DetStruct {
std::vector<int> bb;
std::vector<std::vector<int> > patt;
std::vector<float> conf1;
std::vector<float> conf2;
std::vector<std::vector<int> > isin;
std::vector<cv::Mat> patch;
};
//Temporal structure
struct TempStruct {
std::vector<std::vector<int> > patt;
std::vector<float> conf;
};
struct OComparator{ //比較兩者重合度
OComparator(const std::vector<BoundingBox>& _grid):grid(_grid){}
std::vector<BoundingBox> grid;
bool operator()(int idx1,int idx2){
return grid[idx1].overlap > grid[idx2].overlap;
}
};
struct CComparator{ //比較兩者确信度?
CComparator(const std::vector<float>& _conf):conf(_conf){}
std::vector<float> conf;
bool operator()(int idx1,int idx2){
return conf[idx1]> conf[idx2];
}
};
class TLD{
private:
cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
FerNNClassifier classifier;
LKTracker tracker;
//下面這些參數通過程式開始運作時讀入parameters.yml檔案進行初始化
///Parameters
int bbox_step;
int min_win;
int patch_size;
//initial parameters for positive examples
//從第一幀得到的目标的bounding box中(檔案讀取或者使用者框定),經過幾何變換得
//到 num_closest_init * num_warps_init 個正樣本
int num_closest_init; //最近鄰視窗數 10
int num_warps_init; //幾何變換數目 20
int noise_init;
float angle_init;
float shift_init;
float scale_init;
從跟蹤得到的目标的bounding box中,經過幾何變換更新正樣本(添加到線上模型?)
//update parameters for positive examples
int num_closest_update;
int num_warps_update;
int noise_update;
float angle_update;
float shift_update;
float scale_update;
//parameters for negative examples
float bad_overlap;
float bad_patches;
///Variables
//Integral Images 積分圖像,用以計算2bitBP特征(類似于haar特征的計算)
//Mat最大的優勢跟STL很相似,都是對記憶體進行動态的管理,不需要之前使用者手動的管理記憶體
cv::Mat iisum;
cv::Mat iisqsum;
float var;
//Training data
//std::pair主要的作用是将兩個資料組合成一個資料,兩個資料可以是同一類型或者不同類型。
//pair實質上是一個結構體,其主要的兩個成員變量是first和second,這兩個變量可以直接使用。
//在這裡用來表示樣本,first成員為 features 特征點數組,second成員為 labels 樣本類别标簽
std::vector<std::pair<std::vector<int>,int> > pX; //positive ferns <features,labels=1> 正樣本
std::vector<std::pair<std::vector<int>,int> > nX; // negative ferns <features,labels=0> 負樣本
cv::Mat pEx; //positive NN example
std::vector<cv::Mat> nEx; //negative NN examples
//Test data
std::vector<std::pair<std::vector<int>,int> > nXT; //negative data to Test
std::vector<cv::Mat> nExT; //negative NN examples to Test
//Last frame data
BoundingBox lastbox;
bool lastvalid;
float lastconf;
//Current frame data
//Tracker data
bool tracked;
BoundingBox tbb;
bool tvalid;
float tconf;
//Detector data
TempStruct tmp;
DetStruct dt;
std::vector<BoundingBox> dbb;
std::vector<bool> dvalid; //檢測有效性??
std::vector<float> dconf; //檢測确信度??
bool detected;
//Bounding Boxes
std::vector<BoundingBox> grid;
std::vector<cv::Size> scales;
std::vector<int> good_boxes; //indexes of bboxes with overlap > 0.6
std::vector<int> bad_boxes; //indexes of bboxes with overlap < 0.2
BoundingBox bbhull; // hull of good_boxes //good_boxes 的 殼,也就是視窗的邊框
BoundingBox best_box; // maximum overlapping bbox
public:
//Constructors
TLD();
TLD(const cv::FileNode& file);
void read(const cv::FileNode& file);
//Methods
void init(const cv::Mat& frame1,const cv::Rect &box, FILE* bb_file);
void generatePositiveData(const cv::Mat& frame, int num_warps);
void generateNegativeData(const cv::Mat& frame);
void processFrame(const cv::Mat& img1,const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2,
BoundingBox& bbnext,bool& lastboxfound, bool tl,FILE* bb_file);
void track(const cv::Mat& img1, const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2);
void detect(const cv::Mat& frame);
void clusterConf(const std::vector<BoundingBox>& dbb,const std::vector<float>& dconf,std::vector<BoundingBox>& cbb,std::vector<float>& cconf);
void evaluate();
void learn(const cv::Mat& img);
//Tools
void buildGrid(const cv::Mat& img, const cv::Rect& box);
float bbOverlap(const BoundingBox& box1,const BoundingBox& box2);
void getOverlappingBoxes(const cv::Rect& box1,int num_closest);
void getBBHull();
void getPattern(const cv::Mat& img, cv::Mat& pattern,cv::Scalar& mean,cv::Scalar& stdev);
void bbPoints(std::vector<cv::Point2f>& points, const BoundingBox& bb);
void bbPredict(const std::vector<cv::Point2f>& points1,const std::vector<cv::Point2f>& points2,
const BoundingBox& bb1,BoundingBox& bb2);
double getVar(const BoundingBox& box,const cv::Mat& sum,const cv::Mat& sqsum);
bool bbComp(const BoundingBox& bb1,const BoundingBox& bb2);
int clusterBB(const std::vector<BoundingBox>& dbb,std::vector<int>& indexes);
};
TLD.cpp
[cpp] view plain copy print ?
- #include <TLD.h>
- #include <stdio.h>
- using namespace cv;
- using namespace std;
- TLD::TLD()
- {
- }
- TLD::TLD(const FileNode& file){
- read(file);
- }
- void TLD::read(const FileNode& file){
- ///Bounding Box Parameters
- min_win = (int)file["min_win"];
- ///Genarator Parameters
- //initial parameters for positive examples
- patch_size = (int)file["patch_size"];
- num_closest_init = (int)file["num_closest_init"];
- num_warps_init = (int)file["num_warps_init"];
- noise_init = (int)file["noise_init"];
- angle_init = (float)file["angle_init"];
- shift_init = (float)file["shift_init"];
- scale_init = (float)file["scale_init"];
- //update parameters for positive examples
- num_closest_update = (int)file["num_closest_update"];
- num_warps_update = (int)file["num_warps_update"];
- noise_update = (int)file["noise_update"];
- angle_update = (float)file["angle_update"];
- shift_update = (float)file["shift_update"];
- scale_update = (float)file["scale_update"];
- //parameters for negative examples
- bad_overlap = (float)file["overlap"];
- bad_patches = (int)file["num_patches"];
- classifier.read(file);
- }
- //此函數完成準備工作
- void TLD::init(const Mat& frame1, const Rect& box, FILE* bb_file){
- //bb_file = fopen("bounding_boxes.txt","w");
- //Get Bounding Boxes
- //此函數根據傳入的box(目标邊界框)在傳入的圖像frame1中建構全部的掃描視窗,并計算重疊度
- buildGrid(frame1, box);
- printf("Created %d bounding boxes\n",(int)grid.size()); //vector的成員size()用于擷取向量元素的個數
- ///Preparation
- //allocation
- //積分圖像,用以計算2bitBP特征(類似于haar特征的計算)
- //Mat的建立,方式有兩種:1.調用create(行,列,類型)2.Mat(行,列,類型(值))。
- iisum.create(frame1.rows+1, frame1.cols+1, CV_32F);
- iisqsum.create(frame1.rows+1, frame1.cols+1, CV_64F);
- //Detector data中定義:std::vector<float> dconf; 檢測确信度??
- //vector 的reserve增加了vector的capacity,但是它的size沒有改變!而resize改變了vector
- //的capacity同時也增加了它的size!reserve是容器預留白間,但在空間内不真正建立元素對象,
- //是以在沒有添加新的對象之前,不能引用容器内的元素。
- //不管是調用resize還是reserve,二者對容器原有的元素都沒有影響。
- //myVec.reserve( 100 ); // 新元素還沒有構造, 此時不能用[]通路元素
- //myVec.resize( 100 ); // 用元素的預設構造函數構造了100個新的元素,可以直接操作新元素
- dconf.reserve(100);
- dbb.reserve(100);
- bbox_step =7;
- //以下在Detector data中定義的容器都給其配置設定grid.size()大小(這個是一幅圖像中全部的掃描視窗個數)的容量
- //Detector data中定義TempStruct tmp;
- //tmp.conf.reserve(grid.size());
- tmp.conf = vector<float>(grid.size());
- tmp.patt = vector<vector<int> >(grid.size(), vector<int>(10,0));
- //tmp.patt.reserve(grid.size());
- dt.bb.reserve(grid.size());
- good_boxes.reserve(grid.size());
- bad_boxes.reserve(grid.size());
- //TLD中定義:cv::Mat pEx; //positive NN example 大小為15*15圖像片
- pEx.create(patch_size, patch_size, CV_64F);
- //Init Generator
- //TLD中定義:cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
- generator = PatchGenerator (0,0,noise_init,true,1-scale_init,1+scale_init,-angle_init*CV_PI/180,
- angle_init*CV_PI/180,-angle_init*CV_PI/180,angle_init*CV_PI/180);
- //此函數根據傳入的box(目标邊界框),在整幀圖像中的全部視窗中尋找與該box距離最小(即最相似,
- //重疊度最大)的num_closest_init個視窗,然後把這些視窗 歸入good_boxes容器
- //同時,把重疊度小于0.2的,歸入 bad_boxes 容器
- //首先根據overlap的比例資訊選出重複區域比例大于60%并且前num_closet_init= 10個的最接近box的RectBox,
- //相當于對RectBox進行篩選。并通過BBhull函數得到這些RectBox的最大邊界。
- getOverlappingBoxes(box, num_closest_init);
- printf("Found %d good boxes, %d bad boxes\n",(int)good_boxes.size(),(int)bad_boxes.size());
- printf("Best Box: %d %d %d %d\n",best_box.x, best_box.y, best_box.width, best_box.height);
- printf("Bounding box hull: %d %d %d %d\n", bbhull.x, bbhull.y, bbhull.width, bbhull.height);
- //Correct Bounding Box
- lastbox=best_box;
- lastconf=1;
- lastvalid=true;
- fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
- //Prepare Classifier 準備分類器
- //scales容器裡是所有掃描視窗的尺度,由buildGrid()函數初始化
- classifier.prepare(scales);
- ///Generate Data
- // Generate positive data
- generatePositiveData(frame1, num_warps_init);
- // Set variance threshold
- Scalar stdev, mean;
- //統計best_box的均值和标準差
- 例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
- //frame1(best_box)就表示在frame1中提取best_box區域(目标區域)的圖像片
- meanStdDev(frame1(best_box), mean, stdev);
- //利用積分圖像去計算每個待檢測視窗的方差
- //cvIntegral( const CvArr* image, CvArr* sum, CvArr* sqsum=NULL, CvArr* tilted_sum=NULL );
- //計算積分圖像,輸入圖像,sum積分圖像, W+1×H+1,sqsum對象素值平方的積分圖像,tilted_sum旋轉45度的積分圖像
- //利用積分圖像,可以計算在某象素的上-右方的或者旋轉的矩形區域中進行求和、求均值以及标準方差的計算,
- //并且保證運算的複雜度為O(1)。
- integral(frame1, iisum, iisqsum);
- //級聯分類器子產品一:方差檢測子產品,利用積分圖計算每個待檢測視窗的方差,方差大于var門檻值(目标patch方差的50%)的,
- //則認為其含有前景目标方差;var 為标準差的平方
- var = pow(stdev.val[0],2) * 0.5; //getVar(best_box,iisum,iisqsum);
- cout << "variance: " << var << endl;
- //check variance
- //getVar函數通過積分圖像計算輸入的best_box的方差
- double vr = getVar(best_box, iisum, iisqsum)*0.5;
- cout << "check variance: " << vr << endl;
- // Generate negative data
- generateNegativeData(frame1);
- //Split Negative Ferns into Training and Testing sets (they are already shuffled)
- //将負樣本放進 訓練和測試集
- int half = (int)nX.size()*0.5f;
- //vector::assign函數将區間[start, end)中的值指派給目前的vector.
- //将一半的負樣本集 作為 測試集
- nXT.assign(nX.begin()+half, nX.end()); //nXT; //negative data to Test
- //然後将剩下的一半作為訓練集
- nX.resize(half);
- ///Split Negative NN Examples into Training and Testing sets
- half = (int)nEx.size()*0.5f;
- nExT.assign(nEx.begin()+half,nEx.end());
- nEx.resize(half);
- //Merge Negative Data with Positive Data and shuffle it
- //将負樣本和正樣本合并,然後打亂
- vector<pair<vector<int>,int> > ferns_data(nX.size()+pX.size());
- vector<int> idx = index_shuffle(0, ferns_data.size());
- int a=0;
- for (int i=0;i<pX.size();i++){
- ferns_data[idx[a]] = pX[i];
- a++;
- }
- for (int i=0;i<nX.size();i++){
- ferns_data[idx[a]] = nX[i];
- a++;
- }
- //Data already have been shuffled, just putting it in the same vector
- vector<cv::Mat> nn_data(nEx.size()+1);
- nn_data[0] = pEx;
- for (int i=0;i<nEx.size();i++){
- nn_data[i+1]= nEx[i];
- }
- ///Training
- //訓練 集合分類器(森林) 和 最近鄰分類器
- classifier.trainF(ferns_data, 2); //bootstrap = 2
- classifier.trainNN(nn_data);
- ///Threshold Evaluation on testing sets
- //用樣本在上面得到的 集合分類器(森林) 和 最近鄰分類器 中分類,評價得到最好的門檻值
- classifier.evaluateTh(nXT, nExT);
- }
- void TLD::generatePositiveData(const Mat& frame, int num_warps){
- Scalar mean; //均值
- Scalar stdev; //标準差
- //此函數将frame圖像best_box區域的圖像片歸一化為均值為0的15*15大小的patch,存在pEx正樣本中
- getPattern(frame(best_box), pEx, mean, stdev);
- //Get Fern features on warped patches
- Mat img;
- Mat warped;
- //void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY=0,
- // int borderType=BORDER_DEFAULT ) ;
- //功能:對輸入的圖像src進行高斯濾波後用dst輸出。
- //src和dst當然分别是輸入圖像和輸出圖像。Ksize為高斯濾波器模闆大小,sigmaX和sigmaY分别為高斯濾
- //波在橫向和豎向的濾波系數。borderType為邊緣擴充點插值類型。
- //用9*9高斯核模糊輸入幀,存入img 去噪??
- GaussianBlur(frame, img, Size(9,9), 1.5);
- //在img圖像中截取bbhull資訊(bbhull是包含了位置和大小的矩形框)的圖像賦給warped
- //例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
- warped = img(bbhull);
- RNG& rng = theRNG(); //生成一個随機數
- Point2f pt(bbhull.x + (bbhull.width-1)*0.5f, bbhull.y+(bbhull.height-1)*0.5f); //取矩形框中心的坐标 int i(2)
- //nstructs樹木(由一個特征組建構,每組特征代表圖像塊的不同視圖表示)的個數
- //fern[nstructs] nstructs棵樹的森林的數組??
- vector<int> fern(classifier.getNumStructs());
- pX.clear();
- Mat patch;
- //pX為處理後的RectBox最大邊界處理後的像素資訊,pEx最近鄰的RectBox的Pattern,bbP0為最近鄰的RectBox。
- if (pX.capacity() < num_warps * good_boxes.size())
- pX.reserve(num_warps * good_boxes.size()); //pX正樣本個數為 仿射變換個數 * good_box的個數,故需配置設定至少這麼大的空間
- int idx;
- for (int i=0; i< num_warps; i++){
- if (i>0)
- //PatchGenerator類用來對圖像區域進行仿射變換,先RNG一個随機因子,再調用()運算符産生一個變換後的正樣本。
- generator(frame, pt, warped, bbhull.size(), rng);
- for (int b=0; b < good_boxes.size(); b++){
- idx = good_boxes[b]; //good_boxes容器儲存的是 grid 的索引
- patch = img(grid[idx]); //把img的 grid[idx] 區域(也就是bounding box重疊度高的)這一塊圖像片提取出來
- //getFeatures函數得到輸入的patch的用于樹的節點,也就是特征組的特征fern(13位的二進制代碼)
- classifier.getFeatures(patch, grid[idx].sidx, fern); //grid[idx].sidx 對應的尺度索引
- pX.push_back(make_pair(fern, 1)); //positive ferns <features, labels=1> 正樣本
- }
- }
- printf("Positive examples generated: ferns:%d NN:1\n",(int)pX.size());
- }
- //先對最接近box的RectBox區域得到其patch ,然後将像素資訊轉換為Pattern,
- //具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15),将2維的矩陣變成一維的向量資訊,
- //然後将向量資訊均值設為0,調整為zero mean and unit variance(ZMUV)
- //Output: resized Zero-Mean patch
- void TLD::getPattern(const Mat& img, Mat& pattern, Scalar& mean, Scalar& stdev){
- //将img放縮至patch_size = 15*15,存到pattern中
- resize(img, pattern, Size(patch_size, patch_size));
- //計算pattern這個矩陣的均值和标準差
- //Computes a mean value and a standard deviation of matrix elements.
- meanStdDev(pattern, mean, stdev);
- pattern.convertTo(pattern, CV_32F);
- //opencv中Mat的運算符有重載, Mat可以 + Mat; + Scalar; + int / float / double 都可以
- //将矩陣所有元素減去其均值,也就是把patch的均值設為零
- pattern = pattern - mean.val[0];
- }
- void TLD::generateNegativeData(const Mat& frame){
- //由于之前重疊度小于0.2的,都歸入 bad_boxes了,是以數量挺多,下面的函數用于打亂順序,也就是為了
- //後面随機選擇bad_boxes
- random_shuffle(bad_boxes.begin(), bad_boxes.end());//Random shuffle bad_boxes indexes
- int idx;
- //Get Fern Features of the boxes with big variance (calculated using integral images)
- int a=0;
- //int num = std::min((int)bad_boxes.size(),(int)bad_patches*100); //limits the size of bad_boxes to try
- printf("negative data generation started.\n");
- vector<int> fern(classifier.getNumStructs());
- nX.reserve(bad_boxes.size());
- Mat patch;
- for (int j=0;j<bad_boxes.size();j++){ //把方差較大的bad_boxes加入負樣本
- idx = bad_boxes[j];
- if (getVar(grid[idx],iisum,iisqsum)<var*0.5f)
- continue;
- patch = frame(grid[idx]);
- classifier.getFeatures(patch, grid[idx].sidx, fern);
- nX.push_back(make_pair(fern, 0)); //得到負樣本
- a++;
- }
- printf("Negative examples generated: ferns: %d ", a);
- //random_shuffle(bad_boxes.begin(),bad_boxes.begin()+bad_patches);//Randomly selects 'bad_patches' and get the patterns for NN;
- Scalar dum1, dum2;
- //bad_patches = (int)file["num_patches"]; 在參數檔案中 num_patches = 100
- nEx=vector<Mat>(bad_patches);
- for (int i=0;i<bad_patches;i++){
- idx=bad_boxes[i];
- patch = frame(grid[idx]);
- //具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15)
- //由于負樣本不需要均值和方差,是以就定義dum,将其舍棄
- getPattern(patch,nEx[i],dum1,dum2);
- }
- printf("NN: %d\n",(int)nEx.size());
- }
- //該函數通過積分圖像計算輸入的box的方差
- double TLD::getVar(const BoundingBox& box, const Mat& sum, const Mat& sqsum){
- double brs = sum.at<int>(box.y+box.height, box.x+box.width);
- double bls = sum.at<int>(box.y+box.height, box.x);
- double trs = sum.at<int>(box.y,box.x + box.width);
- double tls = sum.at<int>(box.y,box.x);
- double brsq = sqsum.at<double>(box.y+box.height,box.x+box.width);
- double blsq = sqsum.at<double>(box.y+box.height,box.x);
- double trsq = sqsum.at<double>(box.y,box.x+box.width);
- double tlsq = sqsum.at<double>(box.y,box.x);
- double mean = (brs+tls-trs-bls)/((double)box.area());
- double sqmean = (brsq+tlsq-trsq-blsq)/((double)box.area());
- //方差=E(X^2)-(EX)^2 EX表示均值
- return sqmean-mean*mean;
- }
- void TLD::processFrame(const cv::Mat& img1,const cv::Mat& img2,vector<Point2f>& points1,vector<Point2f>& points2,BoundingBox& bbnext, bool& lastboxfound, bool tl, FILE* bb_file){
- vector<BoundingBox> cbb;
- vector<float> cconf;
- int confident_detections=0;
- int didx; //detection index
- ///Track 跟蹤子產品
- if(lastboxfound && tl){ //tl: train and learn
- //跟蹤
- track(img1, img2, points1, points2);
- }
- else{
- tracked = false;
- }
- ///Detect 檢測子產品
- detect(img2);
- ///Integration 綜合子產品
- //TLD隻跟蹤單目标,是以綜合子產品綜合跟蹤器跟蹤到的單個目标和檢測器檢測到的多個目标,然後隻輸出保守相似度最大的一個目标
- if (tracked){
- bbnext=tbb;
- lastconf=tconf; //表示相關相似度的門檻值
- lastvalid=tvalid; //表示保守相似度的門檻值
- printf("Tracked\n");
- if(detected){ // if Detected
- //通過 重疊度 對檢測器檢測到的目标bounding box進行聚類,每個類其重疊度小于0.5
- clusterConf(dbb, dconf, cbb, cconf); // cluster detections
- printf("Found %d clusters\n",(int)cbb.size());
- for (int i=0;i<cbb.size();i++){
- //找到與跟蹤器跟蹤到的box距離比較遠的類(檢測器檢測到的box),而且它的相關相似度比跟蹤器的要大
- if (bbOverlap(tbb, cbb[i])<0.5 && cconf[i]>tconf){ // Get index of a clusters that is far from tracker and are more confident than the tracker
- confident_detections++; //記錄滿足上述條件,也就是可信度比較高的目标box的個數
- didx=i; //detection index
- }
- }
- //如果隻有一個滿足上述條件的box,那麼就用這個目标box來重新初始化跟蹤器(也就是用檢測器的結果去糾正跟蹤器)
- if (confident_detections==1){ //if there is ONE such a cluster, re-initialize the tracker
- printf("Found a better match..reinitializing tracking\n");
- bbnext=cbb[didx];
- lastconf=cconf[didx];
- lastvalid=false;
- }
- else {
- printf("%d confident cluster was found\n", confident_detections);
- int cx=0,cy=0,cw=0,ch=0;
- int close_detections=0;
- for (int i=0;i<dbb.size();i++){
- //找到檢測器檢測到的box與跟蹤器預測到的box距離很近(重疊度大于0.7)的box,對其坐标和大小進行累加
- if(bbOverlap(tbb,dbb[i])>0.7){ // Get mean of close detections
- cx += dbb[i].x;
- cy +=dbb[i].y;
- cw += dbb[i].width;
- ch += dbb[i].height;
- close_detections++; //記錄最近鄰box的個數
- printf("weighted detection: %d %d %d %d\n",dbb[i].x,dbb[i].y,dbb[i].width,dbb[i].height);
- }
- }
- if (close_detections>0){
- //對與跟蹤器預測到的box距離很近的box 和 跟蹤器本身預測到的box 進行坐标與大小的平均作為最終的
- //目标bounding box,但是跟蹤器的權值較大
- bbnext.x = cvRound((float)(10*tbb.x+cx)/(float)(10+close_detections)); // weighted average trackers trajectory with the close detections
- bbnext.y = cvRound((float)(10*tbb.y+cy)/(float)(10+close_detections));
- bbnext.width = cvRound((float)(10*tbb.width+cw)/(float)(10+close_detections));
- bbnext.height = cvRound((float)(10*tbb.height+ch)/(float)(10+close_detections));
- printf("Tracker bb: %d %d %d %d\n",tbb.x,tbb.y,tbb.width,tbb.height);
- printf("Average bb: %d %d %d %d\n",bbnext.x,bbnext.y,bbnext.width,bbnext.height);
- printf("Weighting %d close detection(s) with tracker..\n",close_detections);
- }
- else{
- printf("%d close detections were found\n",close_detections);
- }
- }
- }
- }
- else{ // If NOT tracking
- printf("Not tracking..\n");
- lastboxfound = false;
- lastvalid = false;
- //如果跟蹤器沒有跟蹤到目标,但是檢測器檢測到了一些可能的目标box,那麼同樣對其進行聚類,但隻是簡單的
- //将聚類的cbb[0]作為新的跟蹤目标box(不比較相似度了??還是裡面已經排好序了??),重新初始化跟蹤器
- if(detected){ // and detector is defined
- clusterConf(dbb,dconf,cbb,cconf); // cluster detections
- printf("Found %d clusters\n",(int)cbb.size());
- if (cconf.size()==1){
- bbnext=cbb[0];
- lastconf=cconf[0];
- printf("Confident detection..reinitializing tracker\n");
- lastboxfound = true;
- }
- }
- }
- lastbox=bbnext;
- if (lastboxfound)
- fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
- else
- fprintf(bb_file,"NaN,NaN,NaN,NaN,NaN\n");
- ///learn 學習子產品
- if (lastvalid && tl)
- learn(img2);
- }
- void TLD::track(const Mat& img1, const Mat& img2, vector<Point2f>& points1, vector<Point2f>& points2){
- //Generate points
- //網格均勻撒點(均勻采樣),在lastbox中共産生最多10*10=100個特征點,存于points1
- bbPoints(points1, lastbox);
- if (points1.size()<1){
- printf("BB= %d %d %d %d, Points not generated\n",lastbox.x,lastbox.y,lastbox.width,lastbox.height);
- tvalid=false;
- tracked=false;
- return;
- }
- vector<Point2f> points = points1;
- //Frame-to-frame tracking with forward-backward error cheking
- //trackf2f函數完成:跟蹤、計算FB error和比對相似度sim,然後篩選出 FB_error[i] <= median(FB_error) 和
- //sim_error[i] > median(sim_error) 的特征點(跟蹤結果不好的特征點),剩下的是不到50%的特征點
- tracked = tracker.trackf2f(img1, img2, points, points2);
- if (tracked){
- //Bounding box prediction
- //利用剩下的這不到一半的跟蹤點輸入來預測bounding box在目前幀的位置和大小 tbb
- bbPredict(points, points2, lastbox, tbb);
- //跟蹤失敗檢測:如果FB error的中值大于10個像素(經驗值),或者預測到的目前box的位置移出圖像,則
- //認為跟蹤錯誤,此時不傳回bounding box;Rect::br()傳回的是右下角的坐标
- //getFB()傳回的是FB error的中值
- if (tracker.getFB()>10 || tbb.x>img2.cols || tbb.y>img2.rows || tbb.br().x < 1 || tbb.br().y <1){
- tvalid =false; //too unstable prediction or bounding box out of image
- tracked = false;
- printf("Too unstable predictions FB error=%f\n", tracker.getFB());
- return;
- }
- //Estimate Confidence and Validity
- //評估跟蹤确信度和有效性
- Mat pattern;
- Scalar mean, stdev;
- BoundingBox bb;
- bb.x = max(tbb.x,0);
- bb.y = max(tbb.y,0);
- bb.width = min(min(img2.cols-tbb.x,tbb.width), min(tbb.width, tbb.br().x));
- bb.height = min(min(img2.rows-tbb.y,tbb.height),min(tbb.height,tbb.br().y));
- //歸一化img2(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
- getPattern(img2(bb),pattern,mean,stdev);
- vector<int> isin;
- float dummy;
- //計算圖像片pattern到線上模型M的保守相似度
- classifier.NNConf(pattern,isin,dummy,tconf); //Conservative Similarity
- tvalid = lastvalid;
- //保守相似度大于門檻值,則評估跟蹤有效
- if (tconf>classifier.thr_nn_valid){
- tvalid =true;
- }
- }
- else
- printf("No points tracked\n");
- }
- //網格均勻撒點,box共10*10=100個特征點
- void TLD::bbPoints(vector<cv::Point2f>& points, const BoundingBox& bb){
- int max_pts=10;
- int margin_h=0; //采樣邊界
- int margin_v=0;
- //網格均勻撒點
- int stepx = ceil((bb.width-2*margin_h)/max_pts); //ceil傳回大于或者等于指定表達式的最小整數
- int stepy = ceil((bb.height-2*margin_v)/max_pts);
- //網格均勻撒點,box共10*10=100個特征點
- for (int y=bb.y+margin_v; y<bb.y+bb.height-margin_v; y+=stepy){
- for (int x=bb.x+margin_h;x<bb.x+bb.width-margin_h;x+=stepx){
- points.push_back(Point2f(x,y));
- }
- }
- }
- //利用剩下的這不到一半的跟蹤點輸入來預測bounding box在目前幀的位置和大小
- void TLD::bbPredict(const vector<cv::Point2f>& points1,const vector<cv::Point2f>& points2,
- const BoundingBox& bb1,BoundingBox& bb2) {
- int npoints = (int)points1.size();
- vector<float> xoff(npoints); //位移
- vector<float> yoff(npoints);
- printf("tracked points : %d\n", npoints);
- for (int i=0;i<npoints;i++){ //計算每個特征點在兩幀之間的位移
- xoff[i]=points2[i].x - points1[i].x;
- yoff[i]=points2[i].y - points1[i].y;
- }
- float dx = median(xoff); //計算位移的中值
- float dy = median(yoff);
- float s;
- //計算bounding box尺度scale的變化:通過計算 目前特征點互相間的距離 與 先前(上一幀)特征點互相間的距離 的
- //比值,以比值的中值作為尺度的變化因子
- if (npoints>1){
- vector<float> d;
- d.reserve(npoints*(npoints-1)/2); //等差數列求和:1+2+...+(npoints-1)
- for (int i=0;i<npoints;i++){
- for (int j=i+1;j<npoints;j++){
- //計算 目前特征點互相間的距離 與 先前(上一幀)特征點互相間的距離 的比值(位移用絕對值)
- d.push_back(norm(points2[i]-points2[j])/norm(points1[i]-points1[j]));
- }
- }
- s = median(d);
- }
- else {
- s = 1.0;
- }
- float s1 = 0.5*(s-1)*bb1.width;
- float s2 = 0.5*(s-1)*bb1.height;
- printf("s= %f s1= %f s2= %f \n", s, s1, s2);
- //得到目前bounding box的位置與大小資訊
- //目前box的x坐标 = 前一幀box的x坐标 + 全部特征點位移的中值(可了解為box移動近似的位移) - 目前box寬的一半
- bb2.x = round( bb1.x + dx - s1);
- bb2.y = round( bb1.y + dy -s2);
- bb2.width = round(bb1.width*s);
- bb2.height = round(bb1.height*s);
- printf("predicted bb: %d %d %d %d\n",bb2.x,bb2.y,bb2.br().x,bb2.br().y);
- }
- void TLD::detect(const cv::Mat& frame){
- //cleaning
- dbb.clear();
- dconf.clear();
- dt.bb.clear();
- //GetTickCount傳回從作業系統啟動到現在所經過的時間
- double t = (double)getTickCount();
- Mat img(frame.rows, frame.cols, CV_8U);
- integral(frame,iisum,iisqsum); //計算frame的積分圖
- GaussianBlur(frame,img,Size(9,9),1.5); //高斯模糊,去噪?
- int numtrees = classifier.getNumStructs();
- float fern_th = classifier.getFernTh(); //getFernTh()傳回thr_fern; 集合分類器的分類門檻值
- vector <int> ferns(10);
- float conf;
- int a=0;
- Mat patch;
- //級聯分類器子產品一:方差檢測子產品,利用積分圖計算每個待檢測視窗的方差,方差大于var門檻值(目标patch方差的50%)的,
- //則認為其含有前景目标
- for (int i=0; i<grid.size(); i++){ //FIXME: BottleNeck 瓶頸
- if (getVar(grid[i],iisum,iisqsum) >= var){ //計算每一個掃描視窗的方差
- a++;
- //級聯分類器子產品二:集合分類器檢測子產品
- patch = img(grid[i]);
- classifier.getFeatures(patch,grid[i].sidx,ferns); //得到該patch特征(13位的二進制代碼)
- conf = classifier.measure_forest(ferns); //計算該特征值對應的後驗機率累加值
- tmp.conf[i]=conf; //Detector data中定義TempStruct tmp;
- tmp.patt[i]=ferns;
- //如果集合分類器的後驗機率的平均值大于門檻值fern_th(由訓練得到),就認為含有前景目标
- if (conf > numtrees*fern_th){
- dt.bb.push_back(i); //将通過以上兩個檢測子產品的掃描視窗記錄在detect structure中
- }
- }
- else
- tmp.conf[i]=0.0;
- }
- int detections = dt.bb.size();
- printf("%d Bounding boxes passed the variance filter\n",a);
- printf("%d Initial detection from Fern Classifier\n", detections);
- //如果通過以上兩個檢測子產品的掃描視窗數大于100個,則隻取後驗機率大的前100個
- if (detections>100){ //CComparator(tmp.conf)指定比較方式???
- nth_element(dt.bb.begin(), dt.bb.begin()+100, dt.bb.end(), CComparator(tmp.conf));
- dt.bb.resize(100);
- detections=100;
- }
- // for (int i=0;i<detections;i++){
- // drawBox(img,grid[dt.bb[i]]);
- // }
- // imshow("detections",img);
- if (detections==0){
- detected=false;
- return;
- }
- printf("Fern detector made %d detections ",detections);
- //兩次使用getTickCount(),然後再除以getTickFrequency(),計算出來的是以秒s為機關的時間(opencv 2.0 以前是ms)
- t=(double)getTickCount()-t;
- printf("in %gms\n", t*1000/getTickFrequency()); //列印以上代碼運作使用的毫秒數
- // Initialize detection structure
- dt.patt = vector<vector<int> >(detections,vector<int>(10,0)); // Corresponding codes of the Ensemble Classifier
- dt.conf1 = vector<float>(detections); // Relative Similarity (for final nearest neighbour classifier)
- dt.conf2 =vector<float>(detections); // Conservative Similarity (for integration with tracker)
- dt.isin = vector<vector<int> >(detections,vector<int>(3,-1)); // Detected (isin=1) or rejected (isin=0) by nearest neighbour classifier
- dt.patch = vector<Mat>(detections,Mat(patch_size,patch_size,CV_32F));// Corresponding patches
- int idx;
- Scalar mean, stdev;
- float nn_th = classifier.getNNTh();
- //級聯分類器子產品三:最近鄰分類器檢測子產品
- for (int i=0;i<detections;i++){ // for every remaining detection
- idx=dt.bb[i]; // Get the detected bounding box index
- patch = frame(grid[idx]);
- getPattern(patch,dt.patch[i],mean,stdev); // Get pattern within bounding box
- //計算圖像片pattern到線上模型M的相關相似度和保守相似度
- classifier.NNConf(dt.patch[i],dt.isin[i],dt.conf1[i],dt.conf2[i]); // Evaluate nearest neighbour classifier
- dt.patt[i]=tmp.patt[idx];
- //printf("Testing feature %d, conf:%f isin:(%d|%d|%d)\n",i,dt.conf1[i],dt.isin[i][0],dt.isin[i][1],dt.isin[i][2]);
- //相關相似度大于門檻值,則認為含有前景目标
- if (dt.conf1[i]>nn_th){ // idx = dt.conf1 > tld.model.thr_nn; % get all indexes that made it through the nearest neighbour
- dbb.push_back(grid[idx]); // BB = dt.bb(:,idx); % bounding boxes
- dconf.push_back(dt.conf2[i]); // Conf = dt.conf2(:,idx); % conservative confidences
- }
- }
- //列印檢測到的可能存在目标的掃描視窗數(可以通過三個級聯檢測器的)
- if (dbb.size()>0){
- printf("Found %d NN matches\n",(int)dbb.size());
- detected=true;
- }
- else{
- printf("No NN matches found.\n");
- detected=false;
- }
- }
- //作者已經用python腳本../datasets/evaluate_vis.py來完成算法評估功能,具體見README
- void TLD::evaluate(){
- }
- void TLD::learn(const Mat& img){
- printf("[Learning] ");
- ///Check consistency
- //檢測一緻性
- BoundingBox bb;
- bb.x = max(lastbox.x,0);
- bb.y = max(lastbox.y,0);
- bb.width = min(min(img.cols-lastbox.x,lastbox.width),min(lastbox.width,lastbox.br().x));
- bb.height = min(min(img.rows-lastbox.y,lastbox.height),min(lastbox.height,lastbox.br().y));
- Scalar mean, stdev;
- Mat pattern;
- //歸一化img(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
- getPattern(img(bb), pattern, mean, stdev);
- vector<int> isin;
- float dummy, conf;
- //計算輸入圖像片(跟蹤器的目标box)與線上模型之間的相關相似度conf
- classifier.NNConf(pattern,isin,conf,dummy);
- if (conf<0.5) { //如果相似度太小了,就不訓練
- printf("Fast change..not training\n");
- lastvalid =false;
- return;
- }
- if (pow(stdev.val[0], 2)< var){ //如果方差太小了,也不訓練
- printf("Low variance..not training\n");
- lastvalid=false;
- return;
- }
- if(isin[2]==1){ //如果被被識别為負樣本,也不訓練
- printf("Patch in negative data..not traing");
- lastvalid=false;
- return;
- }
- /// Data generation 樣本産生
- for (int i=0;i<grid.size();i++){ //計算所有的掃描視窗與目标box的重疊度
- grid[i].overlap = bbOverlap(lastbox, grid[i]);
- }
- //集合分類器
- vector<pair<vector<int>,int> > fern_examples;
- good_boxes.clear();
- bad_boxes.clear();
- //此函數根據傳入的lastbox,在整幀圖像中的全部視窗中尋找與該lastbox距離最小(即最相似,
- //重疊度最大)的num_closest_update個視窗,然後把這些視窗 歸入good_boxes容器(隻是把網格數組的索引存入)
- //同時,把重疊度小于0.2的,歸入 bad_boxes 容器
- getOverlappingBoxes(lastbox, num_closest_update);
- if (good_boxes.size()>0)
- generatePositiveData(img, num_warps_update); //用仿射模型産生正樣本(類似于第一幀的方法,但隻産生10*10=100個)
- else{
- lastvalid = false;
- printf("No good boxes..Not training");
- return;
- }
- fern_examples.reserve(pX.size() + bad_boxes.size());
- fern_examples.assign(pX.begin(), pX.end());
- int idx;
- for (int i=0;i<bad_boxes.size();i++){
- idx=bad_boxes[i];
- if (tmp.conf[idx]>=1){ //加入負樣本,相似度大于1??相似度不是出于0和1之間嗎?
- fern_examples.push_back(make_pair(tmp.patt[idx],0));
- }
- }
- //最近鄰分類器
- vector<Mat> nn_examples;
- nn_examples.reserve(dt.bb.size()+1);
- nn_examples.push_back(pEx);
- for (int i=0;i<dt.bb.size();i++){
- idx = dt.bb[i];
- if (bbOverlap(lastbox,grid[idx]) < bad_overlap)
- nn_examples.push_back(dt.patch[i]);
- }
- /// Classifiers update 分類器訓練
- classifier.trainF(fern_examples,2);
- classifier.trainNN(nn_examples);
- classifier.show(); //把正樣本庫(線上模型)包含的所有正樣本顯示在視窗上
- }
- //檢測器采用掃描視窗的政策
- //此函數根據傳入的box(目标邊界框)在傳入的圖像中建構全部的掃描視窗,并計算每個視窗與box的重疊度
- void TLD::buildGrid(const cv::Mat& img, const cv::Rect& box){
- const float SHIFT = 0.1; //掃描視窗步長為 寬高的 10%
- //尺度縮放系數為1.2 (0.16151*1.2=0.19381),共21種尺度變換
- const float SCALES[] = {0.16151,0.19381,0.23257,0.27908,0.33490,0.40188,0.48225,
- 0.57870,0.69444,0.83333,1,1.20000,1.44000,1.72800,
- 2.07360,2.48832,2.98598,3.58318,4.29982,5.15978,6.19174};
- int width, height, min_bb_side;
- //Rect bbox;
- BoundingBox bbox;
- Size scale;
- int sc=0;
- for (int s=0; s < 21; s++){
- width = round(box.width*SCALES[s]);
- height = round(box.height*SCALES[s]);
- min_bb_side = min(height,width); //bounding box最短的邊
- //由于圖像片(min_win 為15x15像素)是在bounding box中采樣得到的,是以box必須比min_win要大
- //另外,輸入的圖像肯定得比 bounding box 要大了
- if (min_bb_side < min_win || width > img.cols || height > img.rows)
- continue;
- scale.width = width;
- scale.height = height;
- //push_back在vector類中作用為在vector尾部加入一個資料
- //scales在類TLD中定義:std::vector<cv::Size> scales;
- scales.push_back(scale); //把該尺度的視窗存入scales容器,避免在掃描時計算,加快檢測速度
- for (int y=1; y<img.rows-height; y+=round(SHIFT*min_bb_side)){ //按步長移動視窗
- for (int x=1; x<img.cols-width; x+=round(SHIFT*min_bb_side)){
- bbox.x = x;
- bbox.y = y;
- bbox.width = width;
- bbox.height = height;
- //判斷傳入的bounding box(目标邊界框)與 傳入圖像中的此時視窗的 重疊度,
- //以此來确定該圖像視窗是否含有目标
- bbox.overlap = bbOverlap(bbox, BoundingBox(box));
- bbox.sidx = sc; //屬于第幾個尺度
- //grid在類TLD中定義:std::vector<BoundingBox> grid;
- //把本位置和本尺度的掃描視窗存入grid容器
- grid.push_back(bbox);
- }
- }
- sc++;
- }
- }
- //此函數計算兩個bounding box 的重疊度
- //重疊度定義為 兩個box的交集 與 它們的并集 的比
- float TLD::bbOverlap(const BoundingBox& box1, const BoundingBox& box2){
- //先判斷坐标,假如它們都沒有重疊的地方,就直接傳回0
- if (box1.x > box2.x + box2.width) { return 0.0; }
- if (box1.y > box2.y + box2.height) { return 0.0; }
- if (box1.x + box1.width < box2.x) { return 0.0; }
- if (box1.y + box1.height < box2.y) { return 0.0; }
- float colInt = min(box1.x + box1.width, box2.x + box2.width) - max(box1.x, box2.x);
- float rowInt = min(box1.y + box1.height, box2.y + box2.height) - max(box1.y, box2.y);
- float intersection = colInt * rowInt;
- float area1 = box1.width * box1.height;
- float area2 = box2.width * box2.height;
- return intersection / (area1 + area2 - intersection);
- }
- //此函數根據傳入的box1(目标邊界框),在整幀圖像中的全部視窗中尋找與該box1距離最小(即最相似,
- //重疊度最大)的num_closest個視窗,然後把這些視窗 歸入good_boxes容器(隻是把網格數組的索引存入)
- //同時,把重疊度小于0.2的,歸入 bad_boxes 容器
- void TLD::getOverlappingBoxes(const cv::Rect& box1,int num_closest){
- float max_overlap = 0;
- for (int i=0;i<grid.size();i++){
- if (grid[i].overlap > max_overlap) { //找出重疊度最大的box
- max_overlap = grid[i].overlap;
- best_box = grid[i];
- }
- if (grid[i].overlap > 0.6){ //重疊度大于0.6的,歸入 good_boxes
- good_boxes.push_back(i);
- }
- else if (grid[i].overlap < bad_overlap){ //重疊度小于0.2的,歸入 bad_boxes
- bad_boxes.push_back(i);
- }
- }
- //Get the best num_closest (10) boxes and puts them in good_boxes
- if (good_boxes.size()>num_closest){
- //STL中的nth_element()方法找出一個數列中排名第n(下面為第num_closest)的那個數。這個函數運作後
- //在good_boxes[num_closest]前面num_closest個數都比他大,也就是找到最好的num_closest個box了
- std::nth_element(good_boxes.begin(), good_boxes.begin() + num_closest, good_boxes.end(), OComparator(grid));
- //重新壓縮good_boxes為num_closest大小
- good_boxes.resize(num_closest);
- }
- //擷取good_boxes 的 Hull殼,也就是視窗的邊框
- getBBHull();
- }
- //此函數擷取good_boxes 的 Hull殼,也就是視窗(圖像)的邊框 bounding box
- void TLD::getBBHull(){
- int x1=INT_MAX, x2=0; //INT_MAX 最大的整形數
- int y1=INT_MAX, y2=0;
- int idx;
- for (int i=0;i<good_boxes.size();i++){
- idx= good_boxes[i];
- x1=min(grid[idx].x,x1); //防止出現負數??
- y1=min(grid[idx].y,y1);
- x2=max(grid[idx].x + grid[idx].width,x2);
- y2=max(grid[idx].y + grid[idx].height,y2);
- }
- bbhull.x = x1;
- bbhull.y = y1;
- bbhull.width = x2-x1;
- bbhull.height = y2 -y1;
- }
- //如果兩個box的重疊度小于0.5,傳回false,否則傳回true
- bool bbcomp(const BoundingBox& b1,const BoundingBox& b2){
- TLD t;
- if (t.bbOverlap(b1,b2)<0.5)
- return false;
- else
- return true;
- }
- int TLD::clusterBB(const vector<BoundingBox>& dbb,vector<int>& indexes){
- //FIXME: Conditional jump or move depends on uninitialised value(s)
- const int c = dbb.size();
- //1. Build proximity matrix
- Mat D(c,c,CV_32F);
- float d;
- for (int i=0;i<c;i++){
- for (int j=i+1;j<c;j++){
- d = 1-bbOverlap(dbb[i],dbb[j]);
- D.at<float>(i,j) = d;
- D.at<float>(j,i) = d;
- }
- }
- //2. Initialize disjoint clustering
- float L[c-1]; //Level
- int nodes[c-1][2];
- int belongs[c];
- int m=c;
- for (int i=0;i<c;i++){
- belongs[i]=i;
- }
- for (int it=0;it<c-1;it++){
- //3. Find nearest neighbor
- float min_d = 1;
- int node_a, node_b;
- for (int i=0;i<D.rows;i++){
- for (int j=i+1;j<D.cols;j++){
- if (D.at<float>(i,j)<min_d && belongs[i]!=belongs[j]){
- min_d = D.at<float>(i,j);
- node_a = i;
- node_b = j;
- }
- }
- }
- if (min_d>0.5){
- int max_idx =0;
- bool visited;
- for (int j=0;j<c;j++){
- visited = false;
- for(int i=0;i<2*c-1;i++){
- if (belongs[j]==i){
- indexes[j]=max_idx;
- visited = true;
- }
- }
- if (visited)
- max_idx++;
- }
- return max_idx;
- }
- //4. Merge clusters and assign level
- L[m]=min_d;
- nodes[it][0] = belongs[node_a];
- nodes[it][1] = belongs[node_b];
- for (int k=0;k<c;k++){
- if (belongs[k]==belongs[node_a] || belongs[k]==belongs[node_b])
- belongs[k]=m;
- }
- m++;
- }
- return 1;
- }
- //對檢測器檢測到的目标bounding box進行聚類
- //聚類(Cluster)分析是由若幹模式(Pattern)組成的,通常,模式是一個度量(Measurement)的向量,或者是多元空間中的
- //一個點。聚類分析以相似性為基礎,在一個聚類中的模式之間比不在同一聚類中的模式之間具有更多的相似性。
- void TLD::clusterConf(const vector<BoundingBox>& dbb,const vector<float>& dconf,vector<BoundingBox>& cbb,vector<float>& cconf){
- int numbb =dbb.size();
- vector<int> T;
- float space_thr = 0.5;
- int c=1; //記錄 聚類的類個數
- switch (numbb){ //檢測到的含有目标的bounding box個數
- case 1:
- cbb=vector<BoundingBox>(1,dbb[0]); //如果隻檢測到一個,那麼這個就是檢測器檢測到的目标
- cconf=vector<float>(1,dconf[0]);
- return;
- break;
- case 2:
- T =vector<int>(2,0);
- //此函數計算兩個bounding box 的重疊度
- if (1 - bbOverlap(dbb[0],dbb[1]) > space_thr){ //如果隻檢測到兩個box,但他們的重疊度小于0.5
- T[1]=1;
- c=2; //重疊度小于0.5的box,屬于不同的類
- }
- break;
- default: //檢測到的box數目大于2個,則篩選出重疊度大于0.5的
- T = vector<int>(numbb, 0);
- //stable_partition()重新排列元素,使得滿足指定條件的元素排在不滿足條件的元素前面。它維持着兩組元素的順序關系。
- //STL partition就是把一個區間中的元素按照某個條件分成兩類。傳回第二類子集的起點
- //bbcomp()函數判斷兩個box的重疊度小于0.5,傳回false,否則傳回true (分界點是重疊度:0.5)
- //partition() 将dbb劃分為兩個子集,将滿足兩個box的重疊度小于0.5的元素移動到序列的前面,為一個子集,重疊度大于0.5的,
- //放在序列後面,為第二個子集,但兩個子集的大小不知道,傳回第二類子集的起點
- c = partition(dbb, T, (*bbcomp)); //重疊度小于0.5的box,屬于不同的類,是以c是不同的類别個數
- //c = clusterBB(dbb,T);
- break;
- }
- cconf=vector<float>(c);
- cbb=vector<BoundingBox>(c);
- printf("Cluster indexes: ");
- BoundingBox bx;
- for (int i=0;i<c;i++){ //類别個數
- float cnf=0;
- int N=0,mx=0,my=0,mw=0,mh=0;
- for (int j=0;j<T.size();j++){ //檢測到的bounding box個數
- if (T[j]==i){ //将聚類為同一個類别的box的坐标和大小進行累加
- printf("%d ",i);
- cnf=cnf+dconf[j];
- mx=mx+dbb[j].x;
- my=my+dbb[j].y;
- mw=mw+dbb[j].width;
- mh=mh+dbb[j].height;
- N++;
- }
- }
- if (N>0){ //然後求該類的box的坐标和大小的平均值,将平均值作為該類的box的代表
- cconf[i]=cnf/N;
- bx.x=cvRound(mx/N);
- bx.y=cvRound(my/N);
- bx.width=cvRound(mw/N);
- bx.height=cvRound(mh/N);
- cbb[i]=bx; //傳回的是聚類,每一個類都有一個代表的bounding box
- }
- }
- printf("\n");
- }
/*
* TLD.cpp
*
* Created on: Jun 9, 2011
* Author: alantrrs
*/
#include <TLD.h>
#include <stdio.h>
using namespace cv;
using namespace std;
TLD::TLD()
{
}
TLD::TLD(const FileNode& file){
read(file);
}
void TLD::read(const FileNode& file){
///Bounding Box Parameters
min_win = (int)file["min_win"];
///Genarator Parameters
//initial parameters for positive examples
patch_size = (int)file["patch_size"];
num_closest_init = (int)file["num_closest_init"];
num_warps_init = (int)file["num_warps_init"];
noise_init = (int)file["noise_init"];
angle_init = (float)file["angle_init"];
shift_init = (float)file["shift_init"];
scale_init = (float)file["scale_init"];
//update parameters for positive examples
num_closest_update = (int)file["num_closest_update"];
num_warps_update = (int)file["num_warps_update"];
noise_update = (int)file["noise_update"];
angle_update = (float)file["angle_update"];
shift_update = (float)file["shift_update"];
scale_update = (float)file["scale_update"];
//parameters for negative examples
bad_overlap = (float)file["overlap"];
bad_patches = (int)file["num_patches"];
classifier.read(file);
}
//此函數完成準備工作
void TLD::init(const Mat& frame1, const Rect& box, FILE* bb_file){
//bb_file = fopen("bounding_boxes.txt","w");
//Get Bounding Boxes
//此函數根據傳入的box(目标邊界框)在傳入的圖像frame1中建構全部的掃描視窗,并計算重疊度
buildGrid(frame1, box);
printf("Created %d bounding boxes\n",(int)grid.size()); //vector的成員size()用于擷取向量元素的個數
///Preparation
//allocation
//積分圖像,用以計算2bitBP特征(類似于haar特征的計算)
//Mat的建立,方式有兩種:1.調用create(行,列,類型)2.Mat(行,列,類型(值))。
iisum.create(frame1.rows+1, frame1.cols+1, CV_32F);
iisqsum.create(frame1.rows+1, frame1.cols+1, CV_64F);
//Detector data中定義:std::vector<float> dconf; 檢測确信度??
//vector 的reserve增加了vector的capacity,但是它的size沒有改變!而resize改變了vector
//的capacity同時也增加了它的size!reserve是容器預留白間,但在空間内不真正建立元素對象,
//是以在沒有添加新的對象之前,不能引用容器内的元素。
//不管是調用resize還是reserve,二者對容器原有的元素都沒有影響。
//myVec.reserve( 100 ); // 新元素還沒有構造, 此時不能用[]通路元素
//myVec.resize( 100 ); // 用元素的預設構造函數構造了100個新的元素,可以直接操作新元素
dconf.reserve(100);
dbb.reserve(100);
bbox_step =7;
//以下在Detector data中定義的容器都給其配置設定grid.size()大小(這個是一幅圖像中全部的掃描視窗個數)的容量
//Detector data中定義TempStruct tmp;
//tmp.conf.reserve(grid.size());
tmp.conf = vector<float>(grid.size());
tmp.patt = vector<vector<int> >(grid.size(), vector<int>(10,0));
//tmp.patt.reserve(grid.size());
dt.bb.reserve(grid.size());
good_boxes.reserve(grid.size());
bad_boxes.reserve(grid.size());
//TLD中定義:cv::Mat pEx; //positive NN example 大小為15*15圖像片
pEx.create(patch_size, patch_size, CV_64F);
//Init Generator
//TLD中定義:cv::PatchGenerator generator; //PatchGenerator類用來對圖像區域進行仿射變換
/*
cv::PatchGenerator::PatchGenerator (
double _backgroundMin,
double _backgroundMax,
double _noiseRange,
bool _randomBlur = true,
double _lambdaMin = 0.6,
double _lambdaMax = 1.5,
double _thetaMin = -CV_PI,
double _thetaMax = CV_PI,
double _phiMin = -CV_PI,
double _phiMax = CV_PI
)
一般的用法是先初始化一個PatchGenerator的執行個體,然後RNG一個随機因子,再調用()運算符産生一個變換後的正樣本。
*/
generator = PatchGenerator (0,0,noise_init,true,1-scale_init,1+scale_init,-angle_init*CV_PI/180,
angle_init*CV_PI/180,-angle_init*CV_PI/180,angle_init*CV_PI/180);
//此函數根據傳入的box(目标邊界框),在整幀圖像中的全部視窗中尋找與該box距離最小(即最相似,
//重疊度最大)的num_closest_init個視窗,然後把這些視窗 歸入good_boxes容器
//同時,把重疊度小于0.2的,歸入 bad_boxes 容器
//首先根據overlap的比例資訊選出重複區域比例大于60%并且前num_closet_init= 10個的最接近box的RectBox,
//相當于對RectBox進行篩選。并通過BBhull函數得到這些RectBox的最大邊界。
getOverlappingBoxes(box, num_closest_init);
printf("Found %d good boxes, %d bad boxes\n",(int)good_boxes.size(),(int)bad_boxes.size());
printf("Best Box: %d %d %d %d\n",best_box.x, best_box.y, best_box.width, best_box.height);
printf("Bounding box hull: %d %d %d %d\n", bbhull.x, bbhull.y, bbhull.width, bbhull.height);
//Correct Bounding Box
lastbox=best_box;
lastconf=1;
lastvalid=true;
//Print
fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
//Prepare Classifier 準備分類器
//scales容器裡是所有掃描視窗的尺度,由buildGrid()函數初始化
classifier.prepare(scales);
///Generate Data
// Generate positive data
generatePositiveData(frame1, num_warps_init);
// Set variance threshold
Scalar stdev, mean;
//統計best_box的均值和标準差
例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
//frame1(best_box)就表示在frame1中提取best_box區域(目标區域)的圖像片
meanStdDev(frame1(best_box), mean, stdev);
//利用積分圖像去計算每個待檢測視窗的方差
//cvIntegral( const CvArr* image, CvArr* sum, CvArr* sqsum=NULL, CvArr* tilted_sum=NULL );
//計算積分圖像,輸入圖像,sum積分圖像, W+1×H+1,sqsum對象素值平方的積分圖像,tilted_sum旋轉45度的積分圖像
//利用積分圖像,可以計算在某象素的上-右方的或者旋轉的矩形區域中進行求和、求均值以及标準方差的計算,
//并且保證運算的複雜度為O(1)。
integral(frame1, iisum, iisqsum);
//級聯分類器子產品一:方差檢測子產品,利用積分圖計算每個待檢測視窗的方差,方差大于var門檻值(目标patch方差的50%)的,
//則認為其含有前景目标方差;var 為标準差的平方
var = pow(stdev.val[0],2) * 0.5; //getVar(best_box,iisum,iisqsum);
cout << "variance: " << var << endl;
//check variance
//getVar函數通過積分圖像計算輸入的best_box的方差
double vr = getVar(best_box, iisum, iisqsum)*0.5;
cout << "check variance: " << vr << endl;
// Generate negative data
generateNegativeData(frame1);
//Split Negative Ferns into Training and Testing sets (they are already shuffled)
//将負樣本放進 訓練和測試集
int half = (int)nX.size()*0.5f;
//vector::assign函數将區間[start, end)中的值指派給目前的vector.
//将一半的負樣本集 作為 測試集
nXT.assign(nX.begin()+half, nX.end()); //nXT; //negative data to Test
//然後将剩下的一半作為訓練集
nX.resize(half);
///Split Negative NN Examples into Training and Testing sets
half = (int)nEx.size()*0.5f;
nExT.assign(nEx.begin()+half,nEx.end());
nEx.resize(half);
//Merge Negative Data with Positive Data and shuffle it
//将負樣本和正樣本合并,然後打亂
vector<pair<vector<int>,int> > ferns_data(nX.size()+pX.size());
vector<int> idx = index_shuffle(0, ferns_data.size());
int a=0;
for (int i=0;i<pX.size();i++){
ferns_data[idx[a]] = pX[i];
a++;
}
for (int i=0;i<nX.size();i++){
ferns_data[idx[a]] = nX[i];
a++;
}
//Data already have been shuffled, just putting it in the same vector
vector<cv::Mat> nn_data(nEx.size()+1);
nn_data[0] = pEx;
for (int i=0;i<nEx.size();i++){
nn_data[i+1]= nEx[i];
}
///Training
//訓練 集合分類器(森林) 和 最近鄰分類器
classifier.trainF(ferns_data, 2); //bootstrap = 2
classifier.trainNN(nn_data);
///Threshold Evaluation on testing sets
//用樣本在上面得到的 集合分類器(森林) 和 最近鄰分類器 中分類,評價得到最好的門檻值
classifier.evaluateTh(nXT, nExT);
}
/* Generate Positive data
* Inputs:
* - good_boxes (bbP)
* - best_box (bbP0)
* - frame (im0)
* Outputs:
* - Positive fern features (pX)
* - Positive NN examples (pEx)
*/
void TLD::generatePositiveData(const Mat& frame, int num_warps){
/*
CvScalar定義可存放1—4個數值的數值,常用來存儲像素,其結構體如下:
typedef struct CvScalar
{
double val[4];
}CvScalar;
如果使用的圖像是1通道的,則s.val[0]中存儲資料
如果使用的圖像是3通道的,則s.val[0],s.val[1],s.val[2]中存儲資料
*/
Scalar mean; //均值
Scalar stdev; //标準差
//此函數将frame圖像best_box區域的圖像片歸一化為均值為0的15*15大小的patch,存在pEx正樣本中
getPattern(frame(best_box), pEx, mean, stdev);
//Get Fern features on warped patches
Mat img;
Mat warped;
//void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY=0,
// int borderType=BORDER_DEFAULT ) ;
//功能:對輸入的圖像src進行高斯濾波後用dst輸出。
//src和dst當然分别是輸入圖像和輸出圖像。Ksize為高斯濾波器模闆大小,sigmaX和sigmaY分别為高斯濾
//波在橫向和豎向的濾波系數。borderType為邊緣擴充點插值類型。
//用9*9高斯核模糊輸入幀,存入img 去噪??
GaussianBlur(frame, img, Size(9,9), 1.5);
//在img圖像中截取bbhull資訊(bbhull是包含了位置和大小的矩形框)的圖像賦給warped
//例如需要提取圖像A的某個ROI(感興趣區域,由矩形框)的話,用Mat類的B=img(ROI)即可提取
warped = img(bbhull);
RNG& rng = theRNG(); //生成一個随機數
Point2f pt(bbhull.x + (bbhull.width-1)*0.5f, bbhull.y+(bbhull.height-1)*0.5f); //取矩形框中心的坐标 int i(2)
//nstructs樹木(由一個特征組建構,每組特征代表圖像塊的不同視圖表示)的個數
//fern[nstructs] nstructs棵樹的森林的數組??
vector<int> fern(classifier.getNumStructs());
pX.clear();
Mat patch;
//pX為處理後的RectBox最大邊界處理後的像素資訊,pEx最近鄰的RectBox的Pattern,bbP0為最近鄰的RectBox。
if (pX.capacity() < num_warps * good_boxes.size())
pX.reserve(num_warps * good_boxes.size()); //pX正樣本個數為 仿射變換個數 * good_box的個數,故需配置設定至少這麼大的空間
int idx;
for (int i=0; i< num_warps; i++){
if (i>0)
//PatchGenerator類用來對圖像區域進行仿射變換,先RNG一個随機因子,再調用()運算符産生一個變換後的正樣本。
generator(frame, pt, warped, bbhull.size(), rng);
for (int b=0; b < good_boxes.size(); b++){
idx = good_boxes[b]; //good_boxes容器儲存的是 grid 的索引
patch = img(grid[idx]); //把img的 grid[idx] 區域(也就是bounding box重疊度高的)這一塊圖像片提取出來
//getFeatures函數得到輸入的patch的用于樹的節點,也就是特征組的特征fern(13位的二進制代碼)
classifier.getFeatures(patch, grid[idx].sidx, fern); //grid[idx].sidx 對應的尺度索引
pX.push_back(make_pair(fern, 1)); //positive ferns <features, labels=1> 正樣本
}
}
printf("Positive examples generated: ferns:%d NN:1\n",(int)pX.size());
}
//先對最接近box的RectBox區域得到其patch ,然後将像素資訊轉換為Pattern,
//具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15),将2維的矩陣變成一維的向量資訊,
//然後将向量資訊均值設為0,調整為zero mean and unit variance(ZMUV)
//Output: resized Zero-Mean patch
void TLD::getPattern(const Mat& img, Mat& pattern, Scalar& mean, Scalar& stdev){
//将img放縮至patch_size = 15*15,存到pattern中
resize(img, pattern, Size(patch_size, patch_size));
//計算pattern這個矩陣的均值和标準差
//Computes a mean value and a standard deviation of matrix elements.
meanStdDev(pattern, mean, stdev);
pattern.convertTo(pattern, CV_32F);
//opencv中Mat的運算符有重載, Mat可以 + Mat; + Scalar; + int / float / double 都可以
//将矩陣所有元素減去其均值,也就是把patch的均值設為零
pattern = pattern - mean.val[0];
}
/* Inputs:
* - Image
* - bad_boxes (Boxes far from the bounding box)
* - variance (pEx variance)
* Outputs
* - Negative fern features (nX)
* - Negative NN examples (nEx)
*/
void TLD::generateNegativeData(const Mat& frame){
//由于之前重疊度小于0.2的,都歸入 bad_boxes了,是以數量挺多,下面的函數用于打亂順序,也就是為了
//後面随機選擇bad_boxes
random_shuffle(bad_boxes.begin(), bad_boxes.end());//Random shuffle bad_boxes indexes
int idx;
//Get Fern Features of the boxes with big variance (calculated using integral images)
int a=0;
//int num = std::min((int)bad_boxes.size(),(int)bad_patches*100); //limits the size of bad_boxes to try
printf("negative data generation started.\n");
vector<int> fern(classifier.getNumStructs());
nX.reserve(bad_boxes.size());
Mat patch;
for (int j=0;j<bad_boxes.size();j++){ //把方差較大的bad_boxes加入負樣本
idx = bad_boxes[j];
if (getVar(grid[idx],iisum,iisqsum)<var*0.5f)
continue;
patch = frame(grid[idx]);
classifier.getFeatures(patch, grid[idx].sidx, fern);
nX.push_back(make_pair(fern, 0)); //得到負樣本
a++;
}
printf("Negative examples generated: ferns: %d ", a);
//random_shuffle(bad_boxes.begin(),bad_boxes.begin()+bad_patches);//Randomly selects 'bad_patches' and get the patterns for NN;
Scalar dum1, dum2;
//bad_patches = (int)file["num_patches"]; 在參數檔案中 num_patches = 100
nEx=vector<Mat>(bad_patches);
for (int i=0;i<bad_patches;i++){
idx=bad_boxes[i];
patch = frame(grid[idx]);
//具體的說就是歸一化RectBox對應的patch的size(放縮至patch_size = 15*15)
//由于負樣本不需要均值和方差,是以就定義dum,将其舍棄
getPattern(patch,nEx[i],dum1,dum2);
}
printf("NN: %d\n",(int)nEx.size());
}
//該函數通過積分圖像計算輸入的box的方差
double TLD::getVar(const BoundingBox& box, const Mat& sum, const Mat& sqsum){
double brs = sum.at<int>(box.y+box.height, box.x+box.width);
double bls = sum.at<int>(box.y+box.height, box.x);
double trs = sum.at<int>(box.y,box.x + box.width);
double tls = sum.at<int>(box.y,box.x);
double brsq = sqsum.at<double>(box.y+box.height,box.x+box.width);
double blsq = sqsum.at<double>(box.y+box.height,box.x);
double trsq = sqsum.at<double>(box.y,box.x+box.width);
double tlsq = sqsum.at<double>(box.y,box.x);
double mean = (brs+tls-trs-bls)/((double)box.area());
double sqmean = (brsq+tlsq-trsq-blsq)/((double)box.area());
//方差=E(X^2)-(EX)^2 EX表示均值
return sqmean-mean*mean;
}
void TLD::processFrame(const cv::Mat& img1,const cv::Mat& img2,vector<Point2f>& points1,vector<Point2f>& points2,BoundingBox& bbnext, bool& lastboxfound, bool tl, FILE* bb_file){
vector<BoundingBox> cbb;
vector<float> cconf;
int confident_detections=0;
int didx; //detection index
///Track 跟蹤子產品
if(lastboxfound && tl){ //tl: train and learn
//跟蹤
track(img1, img2, points1, points2);
}
else{
tracked = false;
}
///Detect 檢測子產品
detect(img2);
///Integration 綜合子產品
//TLD隻跟蹤單目标,是以綜合子產品綜合跟蹤器跟蹤到的單個目标和檢測器檢測到的多個目标,然後隻輸出保守相似度最大的一個目标
if (tracked){
bbnext=tbb;
lastconf=tconf; //表示相關相似度的門檻值
lastvalid=tvalid; //表示保守相似度的門檻值
printf("Tracked\n");
if(detected){ // if Detected
//通過 重疊度 對檢測器檢測到的目标bounding box進行聚類,每個類其重疊度小于0.5
clusterConf(dbb, dconf, cbb, cconf); // cluster detections
printf("Found %d clusters\n",(int)cbb.size());
for (int i=0;i<cbb.size();i++){
//找到與跟蹤器跟蹤到的box距離比較遠的類(檢測器檢測到的box),而且它的相關相似度比跟蹤器的要大
if (bbOverlap(tbb, cbb[i])<0.5 && cconf[i]>tconf){ // Get index of a clusters that is far from tracker and are more confident than the tracker
confident_detections++; //記錄滿足上述條件,也就是可信度比較高的目标box的個數
didx=i; //detection index
}
}
//如果隻有一個滿足上述條件的box,那麼就用這個目标box來重新初始化跟蹤器(也就是用檢測器的結果去糾正跟蹤器)
if (confident_detections==1){ //if there is ONE such a cluster, re-initialize the tracker
printf("Found a better match..reinitializing tracking\n");
bbnext=cbb[didx];
lastconf=cconf[didx];
lastvalid=false;
}
else {
printf("%d confident cluster was found\n", confident_detections);
int cx=0,cy=0,cw=0,ch=0;
int close_detections=0;
for (int i=0;i<dbb.size();i++){
//找到檢測器檢測到的box與跟蹤器預測到的box距離很近(重疊度大于0.7)的box,對其坐标和大小進行累加
if(bbOverlap(tbb,dbb[i])>0.7){ // Get mean of close detections
cx += dbb[i].x;
cy +=dbb[i].y;
cw += dbb[i].width;
ch += dbb[i].height;
close_detections++; //記錄最近鄰box的個數
printf("weighted detection: %d %d %d %d\n",dbb[i].x,dbb[i].y,dbb[i].width,dbb[i].height);
}
}
if (close_detections>0){
//對與跟蹤器預測到的box距離很近的box 和 跟蹤器本身預測到的box 進行坐标與大小的平均作為最終的
//目标bounding box,但是跟蹤器的權值較大
bbnext.x = cvRound((float)(10*tbb.x+cx)/(float)(10+close_detections)); // weighted average trackers trajectory with the close detections
bbnext.y = cvRound((float)(10*tbb.y+cy)/(float)(10+close_detections));
bbnext.width = cvRound((float)(10*tbb.width+cw)/(float)(10+close_detections));
bbnext.height = cvRound((float)(10*tbb.height+ch)/(float)(10+close_detections));
printf("Tracker bb: %d %d %d %d\n",tbb.x,tbb.y,tbb.width,tbb.height);
printf("Average bb: %d %d %d %d\n",bbnext.x,bbnext.y,bbnext.width,bbnext.height);
printf("Weighting %d close detection(s) with tracker..\n",close_detections);
}
else{
printf("%d close detections were found\n",close_detections);
}
}
}
}
else{ // If NOT tracking
printf("Not tracking..\n");
lastboxfound = false;
lastvalid = false;
//如果跟蹤器沒有跟蹤到目标,但是檢測器檢測到了一些可能的目标box,那麼同樣對其進行聚類,但隻是簡單的
//将聚類的cbb[0]作為新的跟蹤目标box(不比較相似度了??還是裡面已經排好序了??),重新初始化跟蹤器
if(detected){ // and detector is defined
clusterConf(dbb,dconf,cbb,cconf); // cluster detections
printf("Found %d clusters\n",(int)cbb.size());
if (cconf.size()==1){
bbnext=cbb[0];
lastconf=cconf[0];
printf("Confident detection..reinitializing tracker\n");
lastboxfound = true;
}
}
}
lastbox=bbnext;
if (lastboxfound)
fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
else
fprintf(bb_file,"NaN,NaN,NaN,NaN,NaN\n");
///learn 學習子產品
if (lastvalid && tl)
learn(img2);
}
/*Inputs:
* -current frame(img2), last frame(img1), last Bbox(bbox_f[0]).
*Outputs:
*- Confidence(tconf), Predicted bounding box(tbb), Validity(tvalid), points2 (for display purposes only)
*/
void TLD::track(const Mat& img1, const Mat& img2, vector<Point2f>& points1, vector<Point2f>& points2){
//Generate points
//網格均勻撒點(均勻采樣),在lastbox中共産生最多10*10=100個特征點,存于points1
bbPoints(points1, lastbox);
if (points1.size()<1){
printf("BB= %d %d %d %d, Points not generated\n",lastbox.x,lastbox.y,lastbox.width,lastbox.height);
tvalid=false;
tracked=false;
return;
}
vector<Point2f> points = points1;
//Frame-to-frame tracking with forward-backward error cheking
//trackf2f函數完成:跟蹤、計算FB error和比對相似度sim,然後篩選出 FB_error[i] <= median(FB_error) 和
//sim_error[i] > median(sim_error) 的特征點(跟蹤結果不好的特征點),剩下的是不到50%的特征點
tracked = tracker.trackf2f(img1, img2, points, points2);
if (tracked){
//Bounding box prediction
//利用剩下的這不到一半的跟蹤點輸入來預測bounding box在目前幀的位置和大小 tbb
bbPredict(points, points2, lastbox, tbb);
//跟蹤失敗檢測:如果FB error的中值大于10個像素(經驗值),或者預測到的目前box的位置移出圖像,則
//認為跟蹤錯誤,此時不傳回bounding box;Rect::br()傳回的是右下角的坐标
//getFB()傳回的是FB error的中值
if (tracker.getFB()>10 || tbb.x>img2.cols || tbb.y>img2.rows || tbb.br().x < 1 || tbb.br().y <1){
tvalid =false; //too unstable prediction or bounding box out of image
tracked = false;
printf("Too unstable predictions FB error=%f\n", tracker.getFB());
return;
}
//Estimate Confidence and Validity
//評估跟蹤确信度和有效性
Mat pattern;
Scalar mean, stdev;
BoundingBox bb;
bb.x = max(tbb.x,0);
bb.y = max(tbb.y,0);
bb.width = min(min(img2.cols-tbb.x,tbb.width), min(tbb.width, tbb.br().x));
bb.height = min(min(img2.rows-tbb.y,tbb.height),min(tbb.height,tbb.br().y));
//歸一化img2(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
getPattern(img2(bb),pattern,mean,stdev);
vector<int> isin;
float dummy;
//計算圖像片pattern到線上模型M的保守相似度
classifier.NNConf(pattern,isin,dummy,tconf); //Conservative Similarity
tvalid = lastvalid;
//保守相似度大于門檻值,則評估跟蹤有效
if (tconf>classifier.thr_nn_valid){
tvalid =true;
}
}
else
printf("No points tracked\n");
}
//網格均勻撒點,box共10*10=100個特征點
void TLD::bbPoints(vector<cv::Point2f>& points, const BoundingBox& bb){
int max_pts=10;
int margin_h=0; //采樣邊界
int margin_v=0;
//網格均勻撒點
int stepx = ceil((bb.width-2*margin_h)/max_pts); //ceil傳回大于或者等于指定表達式的最小整數
int stepy = ceil((bb.height-2*margin_v)/max_pts);
//網格均勻撒點,box共10*10=100個特征點
for (int y=bb.y+margin_v; y<bb.y+bb.height-margin_v; y+=stepy){
for (int x=bb.x+margin_h;x<bb.x+bb.width-margin_h;x+=stepx){
points.push_back(Point2f(x,y));
}
}
}
//利用剩下的這不到一半的跟蹤點輸入來預測bounding box在目前幀的位置和大小
void TLD::bbPredict(const vector<cv::Point2f>& points1,const vector<cv::Point2f>& points2,
const BoundingBox& bb1,BoundingBox& bb2) {
int npoints = (int)points1.size();
vector<float> xoff(npoints); //位移
vector<float> yoff(npoints);
printf("tracked points : %d\n", npoints);
for (int i=0;i<npoints;i++){ //計算每個特征點在兩幀之間的位移
xoff[i]=points2[i].x - points1[i].x;
yoff[i]=points2[i].y - points1[i].y;
}
float dx = median(xoff); //計算位移的中值
float dy = median(yoff);
float s;
//計算bounding box尺度scale的變化:通過計算 目前特征點互相間的距離 與 先前(上一幀)特征點互相間的距離 的
//比值,以比值的中值作為尺度的變化因子
if (npoints>1){
vector<float> d;
d.reserve(npoints*(npoints-1)/2); //等差數列求和:1+2+...+(npoints-1)
for (int i=0;i<npoints;i++){
for (int j=i+1;j<npoints;j++){
//計算 目前特征點互相間的距離 與 先前(上一幀)特征點互相間的距離 的比值(位移用絕對值)
d.push_back(norm(points2[i]-points2[j])/norm(points1[i]-points1[j]));
}
}
s = median(d);
}
else {
s = 1.0;
}
float s1 = 0.5*(s-1)*bb1.width;
float s2 = 0.5*(s-1)*bb1.height;
printf("s= %f s1= %f s2= %f \n", s, s1, s2);
//得到目前bounding box的位置與大小資訊
//目前box的x坐标 = 前一幀box的x坐标 + 全部特征點位移的中值(可了解為box移動近似的位移) - 目前box寬的一半
bb2.x = round( bb1.x + dx - s1);
bb2.y = round( bb1.y + dy -s2);
bb2.width = round(bb1.width*s);
bb2.height = round(bb1.height*s);
printf("predicted bb: %d %d %d %d\n",bb2.x,bb2.y,bb2.br().x,bb2.br().y);
}
void TLD::detect(const cv::Mat& frame){
//cleaning
dbb.clear();
dconf.clear();
dt.bb.clear();
//GetTickCount傳回從作業系統啟動到現在所經過的時間
double t = (double)getTickCount();
Mat img(frame.rows, frame.cols, CV_8U);
integral(frame,iisum,iisqsum); //計算frame的積分圖
GaussianBlur(frame,img,Size(9,9),1.5); //高斯模糊,去噪?
int numtrees = classifier.getNumStructs();
float fern_th = classifier.getFernTh(); //getFernTh()傳回thr_fern; 集合分類器的分類門檻值
vector <int> ferns(10);
float conf;
int a=0;
Mat patch;
//級聯分類器子產品一:方差檢測子產品,利用積分圖計算每個待檢測視窗的方差,方差大于var門檻值(目标patch方差的50%)的,
//則認為其含有前景目标
for (int i=0; i<grid.size(); i++){ //FIXME: BottleNeck 瓶頸
if (getVar(grid[i],iisum,iisqsum) >= var){ //計算每一個掃描視窗的方差
a++;
//級聯分類器子產品二:集合分類器檢測子產品
patch = img(grid[i]);
classifier.getFeatures(patch,grid[i].sidx,ferns); //得到該patch特征(13位的二進制代碼)
conf = classifier.measure_forest(ferns); //計算該特征值對應的後驗機率累加值
tmp.conf[i]=conf; //Detector data中定義TempStruct tmp;
tmp.patt[i]=ferns;
//如果集合分類器的後驗機率的平均值大于門檻值fern_th(由訓練得到),就認為含有前景目标
if (conf > numtrees*fern_th){
dt.bb.push_back(i); //将通過以上兩個檢測子產品的掃描視窗記錄在detect structure中
}
}
else
tmp.conf[i]=0.0;
}
int detections = dt.bb.size();
printf("%d Bounding boxes passed the variance filter\n",a);
printf("%d Initial detection from Fern Classifier\n", detections);
//如果通過以上兩個檢測子產品的掃描視窗數大于100個,則隻取後驗機率大的前100個
if (detections>100){ //CComparator(tmp.conf)指定比較方式???
nth_element(dt.bb.begin(), dt.bb.begin()+100, dt.bb.end(), CComparator(tmp.conf));
dt.bb.resize(100);
detections=100;
}
// for (int i=0;i<detections;i++){
// drawBox(img,grid[dt.bb[i]]);
// }
// imshow("detections",img);
if (detections==0){
detected=false;
return;
}
printf("Fern detector made %d detections ",detections);
//兩次使用getTickCount(),然後再除以getTickFrequency(),計算出來的是以秒s為機關的時間(opencv 2.0 以前是ms)
t=(double)getTickCount()-t;
printf("in %gms\n", t*1000/getTickFrequency()); //列印以上代碼運作使用的毫秒數
// Initialize detection structure
dt.patt = vector<vector<int> >(detections,vector<int>(10,0)); // Corresponding codes of the Ensemble Classifier
dt.conf1 = vector<float>(detections); // Relative Similarity (for final nearest neighbour classifier)
dt.conf2 =vector<float>(detections); // Conservative Similarity (for integration with tracker)
dt.isin = vector<vector<int> >(detections,vector<int>(3,-1)); // Detected (isin=1) or rejected (isin=0) by nearest neighbour classifier
dt.patch = vector<Mat>(detections,Mat(patch_size,patch_size,CV_32F));// Corresponding patches
int idx;
Scalar mean, stdev;
float nn_th = classifier.getNNTh();
//級聯分類器子產品三:最近鄰分類器檢測子產品
for (int i=0;i<detections;i++){ // for every remaining detection
idx=dt.bb[i]; // Get the detected bounding box index
patch = frame(grid[idx]);
getPattern(patch,dt.patch[i],mean,stdev); // Get pattern within bounding box
//計算圖像片pattern到線上模型M的相關相似度和保守相似度
classifier.NNConf(dt.patch[i],dt.isin[i],dt.conf1[i],dt.conf2[i]); // Evaluate nearest neighbour classifier
dt.patt[i]=tmp.patt[idx];
//printf("Testing feature %d, conf:%f isin:(%d|%d|%d)\n",i,dt.conf1[i],dt.isin[i][0],dt.isin[i][1],dt.isin[i][2]);
//相關相似度大于門檻值,則認為含有前景目标
if (dt.conf1[i]>nn_th){ // idx = dt.conf1 > tld.model.thr_nn; % get all indexes that made it through the nearest neighbour
dbb.push_back(grid[idx]); // BB = dt.bb(:,idx); % bounding boxes
dconf.push_back(dt.conf2[i]); // Conf = dt.conf2(:,idx); % conservative confidences
}
}
//列印檢測到的可能存在目标的掃描視窗數(可以通過三個級聯檢測器的)
if (dbb.size()>0){
printf("Found %d NN matches\n",(int)dbb.size());
detected=true;
}
else{
printf("No NN matches found.\n");
detected=false;
}
}
//作者已經用python腳本../datasets/evaluate_vis.py來完成算法評估功能,具體見README
void TLD::evaluate(){
}
void TLD::learn(const Mat& img){
printf("[Learning] ");
///Check consistency
//檢測一緻性
BoundingBox bb;
bb.x = max(lastbox.x,0);
bb.y = max(lastbox.y,0);
bb.width = min(min(img.cols-lastbox.x,lastbox.width),min(lastbox.width,lastbox.br().x));
bb.height = min(min(img.rows-lastbox.y,lastbox.height),min(lastbox.height,lastbox.br().y));
Scalar mean, stdev;
Mat pattern;
//歸一化img(bb)對應的patch的size(放縮至patch_size = 15*15),存入pattern
getPattern(img(bb), pattern, mean, stdev);
vector<int> isin;
float dummy, conf;
//計算輸入圖像片(跟蹤器的目标box)與線上模型之間的相關相似度conf
classifier.NNConf(pattern,isin,conf,dummy);
if (conf<0.5) { //如果相似度太小了,就不訓練
printf("Fast change..not training\n");
lastvalid =false;
return;
}
if (pow(stdev.val[0], 2)< var){ //如果方差太小了,也不訓練
printf("Low variance..not training\n");
lastvalid=false;
return;
}
if(isin[2]==1){ //如果被被識别為負樣本,也不訓練
printf("Patch in negative data..not traing");
lastvalid=false;
return;
}
/// Data generation 樣本産生
for (int i=0;i<grid.size();i++){ //計算所有的掃描視窗與目标box的重疊度
grid[i].overlap = bbOverlap(lastbox, grid[i]);
}
//集合分類器
vector<pair<vector<int>,int> > fern_examples;
good_boxes.clear();
bad_boxes.clear();
//此函數根據傳入的lastbox,在整幀圖像中的全部視窗中尋找與該lastbox距離最小(即最相似,
//重疊度最大)的num_closest_update個視窗,然後把這些視窗 歸入good_boxes容器(隻是把網格數組的索引存入)
//同時,把重疊度小于0.2的,歸入 bad_boxes 容器
getOverlappingBoxes(lastbox, num_closest_update);
if (good_boxes.size()>0)
generatePositiveData(img, num_warps_update); //用仿射模型産生正樣本(類似于第一幀的方法,但隻産生10*10=100個)
else{
lastvalid = false;
printf("No good boxes..Not training");
return;
}
fern_examples.reserve(pX.size() + bad_boxes.size());
fern_examples.assign(pX.begin(), pX.end());
int idx;
for (int i=0;i<bad_boxes.size();i++){
idx=bad_boxes[i];
if (tmp.conf[idx]>=1){ //加入負樣本,相似度大于1??相似度不是出于0和1之間嗎?
fern_examples.push_back(make_pair(tmp.patt[idx],0));
}
}
//最近鄰分類器
vector<Mat> nn_examples;
nn_examples.reserve(dt.bb.size()+1);
nn_examples.push_back(pEx);
for (int i=0;i<dt.bb.size();i++){
idx = dt.bb[i];
if (bbOverlap(lastbox,grid[idx]) < bad_overlap)
nn_examples.push_back(dt.patch[i]);
}
/// Classifiers update 分類器訓練
classifier.trainF(fern_examples,2);
classifier.trainNN(nn_examples);
classifier.show(); //把正樣本庫(線上模型)包含的所有正樣本顯示在視窗上
}
//檢測器采用掃描視窗的政策
//此函數根據傳入的box(目标邊界框)在傳入的圖像中建構全部的掃描視窗,并計算每個視窗與box的重疊度
void TLD::buildGrid(const cv::Mat& img, const cv::Rect& box){
const float SHIFT = 0.1; //掃描視窗步長為 寬高的 10%
//尺度縮放系數為1.2 (0.16151*1.2=0.19381),共21種尺度變換
const float SCALES[] = {0.16151,0.19381,0.23257,0.27908,0.33490,0.40188,0.48225,
0.57870,0.69444,0.83333,1,1.20000,1.44000,1.72800,
2.07360,2.48832,2.98598,3.58318,4.29982,5.15978,6.19174};
int width, height, min_bb_side;
//Rect bbox;
BoundingBox bbox;
Size scale;
int sc=0;
for (int s=0; s < 21; s++){
width = round(box.width*SCALES[s]);
height = round(box.height*SCALES[s]);
min_bb_side = min(height,width); //bounding box最短的邊
//由于圖像片(min_win 為15x15像素)是在bounding box中采樣得到的,是以box必須比min_win要大
//另外,輸入的圖像肯定得比 bounding box 要大了
if (min_bb_side < min_win || width > img.cols || height > img.rows)
continue;
scale.width = width;
scale.height = height;
//push_back在vector類中作用為在vector尾部加入一個資料
//scales在類TLD中定義:std::vector<cv::Size> scales;
scales.push_back(scale); //把該尺度的視窗存入scales容器,避免在掃描時計算,加快檢測速度
for (int y=1; y<img.rows-height; y+=round(SHIFT*min_bb_side)){ //按步長移動視窗
for (int x=1; x<img.cols-width; x+=round(SHIFT*min_bb_side)){
bbox.x = x;
bbox.y = y;
bbox.width = width;
bbox.height = height;
//判斷傳入的bounding box(目标邊界框)與 傳入圖像中的此時視窗的 重疊度,
//以此來确定該圖像視窗是否含有目标
bbox.overlap = bbOverlap(bbox, BoundingBox(box));
bbox.sidx = sc; //屬于第幾個尺度
//grid在類TLD中定義:std::vector<BoundingBox> grid;
//把本位置和本尺度的掃描視窗存入grid容器
grid.push_back(bbox);
}
}
sc++;
}
}
//此函數計算兩個bounding box 的重疊度
//重疊度定義為 兩個box的交集 與 它們的并集 的比
float TLD::bbOverlap(const BoundingBox& box1, const BoundingBox& box2){
//先判斷坐标,假如它們都沒有重疊的地方,就直接傳回0
if (box1.x > box2.x + box2.width) { return 0.0; }
if (box1.y > box2.y + box2.height) { return 0.0; }
if (box1.x + box1.width < box2.x) { return 0.0; }
if (box1.y + box1.height < box2.y) { return 0.0; }
float colInt = min(box1.x + box1.width, box2.x + box2.width) - max(box1.x, box2.x);
float rowInt = min(box1.y + box1.height, box2.y + box2.height) - max(box1.y, box2.y);
float intersection = colInt * rowInt;
float area1 = box1.width * box1.height;
float area2 = box2.width * box2.height;
return intersection / (area1 + area2 - intersection);
}
//此函數根據傳入的box1(目标邊界框),在整幀圖像中的全部視窗中尋找與該box1距離最小(即最相似,
//重疊度最大)的num_closest個視窗,然後把這些視窗 歸入good_boxes容器(隻是把網格數組的索引存入)
//同時,把重疊度小于0.2的,歸入 bad_boxes 容器
void TLD::getOverlappingBoxes(const cv::Rect& box1,int num_closest){
float max_overlap = 0;
for (int i=0;i<grid.size();i++){
if (grid[i].overlap > max_overlap) { //找出重疊度最大的box
max_overlap = grid[i].overlap;
best_box = grid[i];
}
if (grid[i].overlap > 0.6){ //重疊度大于0.6的,歸入 good_boxes
good_boxes.push_back(i);
}
else if (grid[i].overlap < bad_overlap){ //重疊度小于0.2的,歸入 bad_boxes
bad_boxes.push_back(i);
}
}
//Get the best num_closest (10) boxes and puts them in good_boxes
if (good_boxes.size()>num_closest){
//STL中的nth_element()方法找出一個數列中排名第n(下面為第num_closest)的那個數。這個函數運作後
//在good_boxes[num_closest]前面num_closest個數都比他大,也就是找到最好的num_closest個box了
std::nth_element(good_boxes.begin(), good_boxes.begin() + num_closest, good_boxes.end(), OComparator(grid));
//重新壓縮good_boxes為num_closest大小
good_boxes.resize(num_closest);
}
//擷取good_boxes 的 Hull殼,也就是視窗的邊框
getBBHull();
}
//此函數擷取good_boxes 的 Hull殼,也就是視窗(圖像)的邊框 bounding box
void TLD::getBBHull(){
int x1=INT_MAX, x2=0; //INT_MAX 最大的整形數
int y1=INT_MAX, y2=0;
int idx;
for (int i=0;i<good_boxes.size();i++){
idx= good_boxes[i];
x1=min(grid[idx].x,x1); //防止出現負數??
y1=min(grid[idx].y,y1);
x2=max(grid[idx].x + grid[idx].width,x2);
y2=max(grid[idx].y + grid[idx].height,y2);
}
bbhull.x = x1;
bbhull.y = y1;
bbhull.width = x2-x1;
bbhull.height = y2 -y1;
}
//如果兩個box的重疊度小于0.5,傳回false,否則傳回true
bool bbcomp(const BoundingBox& b1,const BoundingBox& b2){
TLD t;
if (t.bbOverlap(b1,b2)<0.5)
return false;
else
return true;
}
int TLD::clusterBB(const vector<BoundingBox>& dbb,vector<int>& indexes){
//FIXME: Conditional jump or move depends on uninitialised value(s)
const int c = dbb.size();
//1. Build proximity matrix
Mat D(c,c,CV_32F);
float d;
for (int i=0;i<c;i++){
for (int j=i+1;j<c;j++){
d = 1-bbOverlap(dbb[i],dbb[j]);
D.at<float>(i,j) = d;
D.at<float>(j,i) = d;
}
}
//2. Initialize disjoint clustering
float L[c-1]; //Level
int nodes[c-1][2];
int belongs[c];
int m=c;
for (int i=0;i<c;i++){
belongs[i]=i;
}
for (int it=0;it<c-1;it++){
//3. Find nearest neighbor
float min_d = 1;
int node_a, node_b;
for (int i=0;i<D.rows;i++){
for (int j=i+1;j<D.cols;j++){
if (D.at<float>(i,j)<min_d && belongs[i]!=belongs[j]){
min_d = D.at<float>(i,j);
node_a = i;
node_b = j;
}
}
}
if (min_d>0.5){
int max_idx =0;
bool visited;
for (int j=0;j<c;j++){
visited = false;
for(int i=0;i<2*c-1;i++){
if (belongs[j]==i){
indexes[j]=max_idx;
visited = true;
}
}
if (visited)
max_idx++;
}
return max_idx;
}
//4. Merge clusters and assign level
L[m]=min_d;
nodes[it][0] = belongs[node_a];
nodes[it][1] = belongs[node_b];
for (int k=0;k<c;k++){
if (belongs[k]==belongs[node_a] || belongs[k]==belongs[node_b])
belongs[k]=m;
}
m++;
}
return 1;
}
//對檢測器檢測到的目标bounding box進行聚類
//聚類(Cluster)分析是由若幹模式(Pattern)組成的,通常,模式是一個度量(Measurement)的向量,或者是多元空間中的
//一個點。聚類分析以相似性為基礎,在一個聚類中的模式之間比不在同一聚類中的模式之間具有更多的相似性。
void TLD::clusterConf(const vector<BoundingBox>& dbb,const vector<float>& dconf,vector<BoundingBox>& cbb,vector<float>& cconf){
int numbb =dbb.size();
vector<int> T;
float space_thr = 0.5;
int c=1; //記錄 聚類的類個數
switch (numbb){ //檢測到的含有目标的bounding box個數
case 1:
cbb=vector<BoundingBox>(1,dbb[0]); //如果隻檢測到一個,那麼這個就是檢測器檢測到的目标
cconf=vector<float>(1,dconf[0]);
return;
break;
case 2:
T =vector<int>(2,0);
//此函數計算兩個bounding box 的重疊度
if (1 - bbOverlap(dbb[0],dbb[1]) > space_thr){ //如果隻檢測到兩個box,但他們的重疊度小于0.5
T[1]=1;
c=2; //重疊度小于0.5的box,屬于不同的類
}
break;
default: //檢測到的box數目大于2個,則篩選出重疊度大于0.5的
T = vector<int>(numbb, 0);
//stable_partition()重新排列元素,使得滿足指定條件的元素排在不滿足條件的元素前面。它維持着兩組元素的順序關系。
//STL partition就是把一個區間中的元素按照某個條件分成兩類。傳回第二類子集的起點
//bbcomp()函數判斷兩個box的重疊度小于0.5,傳回false,否則傳回true (分界點是重疊度:0.5)
//partition() 将dbb劃分為兩個子集,将滿足兩個box的重疊度小于0.5的元素移動到序列的前面,為一個子集,重疊度大于0.5的,
//放在序列後面,為第二個子集,但兩個子集的大小不知道,傳回第二類子集的起點
c = partition(dbb, T, (*bbcomp)); //重疊度小于0.5的box,屬于不同的類,是以c是不同的類别個數
//c = clusterBB(dbb,T);
break;
}
cconf=vector<float>(c);
cbb=vector<BoundingBox>(c);
printf("Cluster indexes: ");
BoundingBox bx;
for (int i=0;i<c;i++){ //類别個數
float cnf=0;
int N=0,mx=0,my=0,mw=0,mh=0;
for (int j=0;j<T.size();j++){ //檢測到的bounding box個數
if (T[j]==i){ //将聚類為同一個類别的box的坐标和大小進行累加
printf("%d ",i);
cnf=cnf+dconf[j];
mx=mx+dbb[j].x;
my=my+dbb[j].y;
mw=mw+dbb[j].width;
mh=mh+dbb[j].height;
N++;
}
}
if (N>0){ //然後求該類的box的坐标和大小的平均值,将平均值作為該類的box的代表
cconf[i]=cnf/N;
bx.x=cvRound(mx/N);
bx.y=cvRound(my/N);
bx.width=cvRound(mw/N);
bx.height=cvRound(mh/N);
cbb[i]=bx; //傳回的是聚類,每一個類都有一個代表的bounding box
}
}
printf("\n");
}