# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
import os.path as osp
import PIL
from utils.cython_bbox import bbox_overlaps
import numpy as np
import scipy.sparse
from model import config as cfg
'''
imdb class為所有資料集的父類,包含了所有資料集共有的特性。
例如:資料集名稱(name)、資料集類名清單(classes)、資料集的檔案名清單(_image_index)、roi集合、config
'''
'''
roidb是由字典組成的list,roidb[img_index]包含了該圖檔索引所包含到roi資訊,下面以roidb[img_index]為例說明:
boxes:box位置資訊,box_num*4的np array
gt_overlaps:所有box在不同類别的得分,box_num*class_num矩陣
gt_classes:所有box的真實類别,box_num長度的list
filpped:是否翻轉
max_overlaps:每個box的在所有類别的得分最大值,box_num長度
max_classes:每個box的得分最高所對應的類,box_num長度
'''
class imdb(object):
"""Image database."""
def __init__(self, name, classes=None):
self._name = name # 資料集名稱
self._num_classes = 22 # 資料集類别個數
if not classes:
self._classes = []
else:
self._classes = classes # 資料集類名清單
self._image_index = [] # 資料集圖檔檔案名清單 例如 data/VOCdevkit2007/VOC2007/ImageSets/Main/{image_set}.txt
self._obj_proposer = 'gt'
self._roidb = None # 這是一個字典,裡面包含了gt_box、真實标簽、gt_overlaps和翻轉标簽 flipped: true,代表圖檔被水準反轉
self._roidb_handler = self.default_roidb # roi資料清單
# Use this dict for storing dataset specific config options
self.config = {}
@property
def name(self):
return self._name
@property
def num_classes(self):
return len(self._classes)
@property
def classes(self):
return self._classes
@property
def image_index(self):
return self._image_index
# 傳回ground-truth每個ROI構成的資料集
@property
def roidb_handler(self):
return self._roidb_handler
@roidb_handler.setter
def roidb_handler(self, val):
self._roidb_handler = val
def set_proposal_method(self, method):
method = eval('self.' + method + '_roidb')
self.roidb_handler = method
# 屬性
@property
def roidb(self):
# A roidb is a list of dictionaries, each with the following keys:
# boxes
# gt_overlaps
# gt_classes
# flipped
# 如果已經有了,那麼直接傳回,沒有就通過指針指向的函數生成
if self._roidb is not None:
return self._roidb
self._roidb = self.roidb_handler()
return self._roidb
# cache_path用來生成roidb緩存檔案的檔案夾,用來存儲資料集的roi
@property
def cache_path(self):
cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property
def num_images(self):
return len(self.image_index)
def image_path_at(self, i):
raise NotImplementedError
def default_roidb(self):
raise NotImplementedError
def evaluate_detections(self, all_boxes, output_dir=None):
"""
all_boxes is a list of length number-of-classes.
Each list element is a list of length number-of-images.
Each of those list elements is either an empty list []
or a numpy array of detection.
all_boxes[class][image] = [] or np.array of shape #dets x 5
"""
raise NotImplementedError
# 傳回圖像的size[0],即寬度值
def _get_widths(self):
return [PIL.Image.open(self.image_path_at(i)).size[0]
for i in range(self.num_images)]
# 對圖像資料進行水準翻轉,進行資料增強
def append_flipped_images(self):
num_images = self.num_images
widths = self._get_widths()
'''
擴充下copy()
例dic = {'name': 'liubo', 'num': [1, 2, 3]}
dic1 = dic
dic2 = dic.copy()
dic['name'] = '123123' # 修改父對象dic
dic['num'].remove(1) # 修改父對象dic中的[1, 2, 3]清單子對象
# 輸出結果
print(dic) # {'name': '123123', 'age': [2, 3]}
print(dic1) # {'name': '123123', 'age': [2, 3]}
print(dic2) # {'name': 'liubo', 'age': [2, 3]}
也就是說用copy,父對象不會因為dic的改變而改變,而子對象會
'''
for i in range(num_images):
# roidb['boxes']有四個元素,分别代表roi的四個點xmin,ymin,xmax,ymax
boxes = self.roidb[i]['boxes'].copy()
# 假設boxes=([1,2,4,2]),oldx1=[1],oldx2=[4]
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
# widths[i]代表寬
# 變換坐标,将xmax變成xmin,xmin變成xmax關于x=xmin對稱的點,翻轉後boxes變成[-2,2,1,4]
boxes[:, 0] = widths[i] - oldx2 - 1
boxes[:, 2] = widths[i] - oldx1 - 1
assert (boxes[:, 2] >= boxes[:, 0]).all() # 翻轉後的xmax肯定大于xmin
entry = {'boxes': boxes,
'gt_overlaps': self.roidb[i]['gt_overlaps'],
'gt_classes': self.roidb[i]['gt_classes'],
'flipped': True} # flipped變為True代表水準翻轉
# 将翻轉後的圖像資料也加入
self.roidb.append(entry)
# 因為是按順序翻轉,所有隻需要将原來的擴大一倍,roidb裡面的圖檔資訊索引與image_index索引對應
self._image_index = self._image_index * 2
# 根據RP來确定候選框的recall值
def evaluate_recall(self, candidate_boxes=None, thresholds=None,
area='all', limit=None):
"""Evaluate detection proposal recall metrics. 評估recall值
Returns:
results: dictionary of results with keys 傳回結果是下面4個名額的字典
'ar': average recall
'recalls': vector recalls at each IoU overlap threshold
'thresholds': vector of IoU overlap thresholds
'gt_overlaps': vector of all ground-truth overlaps
"""
# Record max overlap value for each gt box 記錄每個gt_box的最大重疊值
# Return vector of overlap values
# 制定了一些area範圍,先根據area找到index,再通過area_ranges[index]找到範圍
areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
'96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
area_ranges = [[0 ** 2, 1e5 ** 2], # all
[0 ** 2, 32 ** 2], # small
[32 ** 2, 96 ** 2], # medium
[96 ** 2, 1e5 ** 2], # large
[96 ** 2, 128 ** 2], # 96-128
[128 ** 2, 256 ** 2], # 128-256
[256 ** 2, 512 ** 2], # 256-512
[512 ** 2, 1e5 ** 2], # 512-inf
]
assert area in areas, 'unknown area range: {}'.format(area)
area_range = area_ranges[areas[area]]
gt_overlaps = np.zeros(0)
num_pos = 0
'''
roidb是由字典組成的list,roidb[img_index]包含了該圖檔索引所包含到roi資訊,下面以roidb[img_index]為例說明:
boxes:box位置資訊,box_num*4的np array
gt_overlaps:所有box在不同類别的得分,box_num*class_num矩陣
gt_classes:所有box的真實類别,box_num長度的list
filpped:是否翻轉
max_overlaps:每個box的在所有類别的得分最大值,box_num長度
max_classes:每個box的得分最高所對應的類,box_num長度
'''
for i in range(self.num_images):
# Checking for max_overlaps == 1 avoids including crowd annotations
# (...pretty hacking :/)
# 首先擷取roidb中的值
max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) # 取出每一行最大的得分
# 得到滿足gt_classes>0&&max_gt_overlaps=1對應的第0列
gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
(max_gt_overlaps == 1))[0] # 如果max_gt_overlaps最大的得分==1,這個下表對應的就是gt
gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
gt_areas = self.roidb[i]['seg_areas'][gt_inds] # seg_areas:box的面積
valid_gt_inds = np.where((gt_areas >= area_range[0]) &
(gt_areas <= area_range[1]))[0] # 滿足範圍之内的面積
gt_boxes = gt_boxes[valid_gt_inds, :] # 隻取在範圍之内的
num_pos += len(valid_gt_inds)
if candidate_boxes is None:
# If candidate_boxes is not supplied, the default is to use the
# non-ground-truth boxes from this roidb
# 所有滿足roidb[i]['gt_classes']==0的橫坐标
non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
boxes = self.roidb[i]['boxes'][non_gt_inds, :]
else:
boxes = candidate_boxes[i]
if boxes.shape[0] == 0:
continue
if limit is not None and boxes.shape[0] > limit:
boxes = boxes[:limit, :]
# 計算目前圖像的boxes與gtboxes的IOU overlap
overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
_gt_overlaps = np.zeros((gt_boxes.shape[0]))
# 對于每一張圖檔内的每一個gt_boxes,都要找到最大的IoU
for j in range(gt_boxes.shape[0]):
# find which proposal box maximally covers each gt box
# 找到某一個框能最大限度的覆寫gt
argmax_overlaps = overlaps.argmax(axis=0) # 每一列最大值的下标
# and get the iou amount of coverage for each gt box
# 獲得某個框對于gt的覆寫最大值
max_overlaps = overlaps.max(axis=0) # 每一列的最大值
# find which gt box is 'best' covered (i.e. 'best' = most iou)
gt_ind = max_overlaps.argmax() # 得到最大值所對應的下标
gt_ovr = max_overlaps.max() #
assert (gt_ovr >= 0)
# find the proposal box that covers the best covered gt box
# 找到這個最大覆寫的gt_index所對應的box_index
box_ind = argmax_overlaps[gt_ind]
# record the iou coverage of this gt box
# 記錄gt_box的IOU值
_gt_overlaps[j] = overlaps[box_ind, gt_ind]
'''
上面一系列操作可以了解為找到數組中最大值的橫坐标box_ind和縱坐标gt_ind
是以_gt_overlaps[j]必定是等于我最初求得的gt_ovr,方法複雜但降低了複雜度
舉例:
overlaps=[[1, 4, 2, 3], #為了友善都用整數
[5, 2, 4, 1],
[3, 1, 8, 4]]
目标找到gt_ovr=8,box_ind=2,gt_ind=2
argmax_overlaps = overlaps.argmax(axis=0) # 每一列最大值的下标 [1,0,2,2]
max_overlaps = overlaps.max(axis=0) # 每一列的最大值 [5,4,8,4]
gt_ind = max_overlaps.argmax() # 得到最大值所對應的下标 2
gt_ovr = max_overlaps.max() # 最大值 8
box_ind = argmax_overlaps[gt_ind] # 2
'''
assert (_gt_overlaps[j] == gt_ovr)
# mark the proposal box and the gt box as used
# 标記該點使用過,并且把對應的行和列都改為-1
overlaps[box_ind, :] = -1
overlaps[:, gt_ind] = -1
# append recorded iou coverage level
# 類似于拼接,把所有獲得的IOU值都儲存在1行n列的數組中
gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
gt_overlaps = np.sort(gt_overlaps)
# 生成threshold來進行不同間隔内的recall計算
if thresholds is None:
step = 0.05
thresholds = np.arange(0.5, 0.95 + 1e-5, step)
recalls = np.zeros_like(thresholds)
# compute recall for each iou threshold
for i, t in enumerate(thresholds):
recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
# ar = 2 * np.trapz(recalls, thresholds)
ar = recalls.mean()
return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
'gt_overlaps': gt_overlaps}
'''
roidb是由字典組成的list,roidb[img_index]包含了該圖檔索引所包含到roi資訊,下面以roidb[img_index]為例說明:
boxes:box位置資訊,box_num*4的np array
gt_overlaps:所有box在不同類别的得分,box_num*class_num矩陣
gt_classes:所有box的真實類别,box_num長度的list
filpped:是否翻轉
max_overlaps:每個box的在所有類别的得分最大值,box_num長度
max_classes:每個box的得分最高所對應的類,box_num長度
'''
def create_roidb_from_box_list(self, box_list, gt_roidb):
# box_list的長度必須跟圖檔的數量相同,相當于為每個圖檔創造roi,各圖像要一一對應
assert len(box_list) == self.num_images, \
'Number of boxes must match number of ground-truth images'
roidb = []
for i in range(self.num_images):
# 周遊每張圖檔,boxes代表目前圖像中的box
boxes = box_list[i]
# 代表目前boxes中box的個數
num_boxes = boxes.shape[0]
# overlaps的shape始終為:num_boxes × num_classes 。
overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
# 擷取所有的box和class
gt_boxes = gt_roidb[i]['boxes']
gt_classes = gt_roidb[i]['gt_classes']
# 計算目前圖像的boxes與gtboxes的IOU overlap
# shape為num_boxes × num_gtboxes
gt_overlaps = bbox_overlaps(boxes.astype(np.float),
gt_boxes.astype(np.float))
argmaxes = gt_overlaps.argmax(axis=1)
maxes = gt_overlaps.max(axis=1)
I = np.where(maxes > 0)[0] # 所有滿足值的橫坐标
'''
上述操作擷取每一行的最大值、最大值下标、最大值的橫坐标(滿足最大值>0)
然後将滿足最大值的橫坐标對應的max值指派給overlaps對應的坐标
舉例
gt_overlaps = [[1, 4, 2, 5],
[-1, -1, -1, -1],
[7, 2, -1, 3]]
argmaxes = gt_overlaps.argmax(axis=1) # [3, 0, 0]
maxes = gt_overlaps.max(axis=1) # [5, -1, 7]
I=np.where(maxes > 0)[0] # [0, 2]
對應指派到下面
overlaps[0, gt_classes[3]] = 5
overlaps[2, gt_classes[0]] = 7
'''
overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
'''
a = np.array([[3, 1], [5, 6]])
print(scipy.sparse.csr_matrix(a))
(0, 0) 3
(0, 1) 1
(1, 0) 5
(1, 1) 6
'''
# gt_overlaps:所有box在不同類别的得分,box_num*class_num矩陣
overlaps = scipy.sparse.csr_matrix(overlaps)
roidb.append({
'boxes': boxes,
'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
'gt_overlaps': overlaps,
'flipped': False,
'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
})
return roidb
# 将a b兩個roidb歸并為一個roidb
@staticmethod
def merge_roidbs(a, b):
assert len(a) == len(b)
for i in range(len(a)):
a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
b[i]['gt_classes']))
a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
b[i]['gt_overlaps']])
a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
b[i]['seg_areas']))
return a
def competition_mode(self, on):
"""Turn competition mode on or off."""
pass