天天看點

RefineDet:(1)訓練腳本解析1. 概述2. Reference

相關連結:

  1. 《RefineDet:Single-Shot Refinement Neural Network for Object Detection》論文筆記
  2. RefineDet:(2)檢測部分網絡解析
  3. RefineDet:(3)C++測試代碼

1. 概述

RefineDet的相關原理介紹已經在之前的部落格中做了介紹:

這裡主要就所用到的訓練腳本中做一些分析,這裡選用的網絡是VGG16作為基礎訓練網絡,相關訓練腳本的參數解釋如下

'''
所用檔案位于:RefineDet-master\examples\refinedet\VGG16_VOC2007_512.py
'''

# Add extra layers on top of a "base" network (e.g. VGGNet or ResNet).
# AddExtraLayers函數是網絡結構構造中比較重要的函數,主要實作的就是論文中的
# transfer connection block (TCB)部分,也就是類似FPN算法的特征融合操作。
def AddExtraLayers(net, use_batchnorm=True, arm_source_layers=[], normalizations=[], lr_mult=1):
    use_relu = True

    # Add additional convolutional layers.
	# 添加論文中所說的後面幾層卷積
    # 512/32: 16 x 16
    from_layer = net.keys()[-1]

    # 512/64: 8 x 8
    out_layer = "conv6_1"
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1, lr_mult=lr_mult)

    from_layer = out_layer
    out_layer = "conv6_2"
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2, lr_mult=lr_mult)
	
	# 按照指定的ARM特征圖建構FPN網絡結構
    arm_source_layers.reverse()  # ARM子產品的特征圖來源層
    normalizations.reverse()
    num_p = 6
    for index, layer in enumerate(arm_source_layers):
        out_layer = layer
		# 對conv4_3與conv5_3層歸一化操作,scale分别為10與8
		# 文中講到這兩個層由于之前層的特征差異,這裡對其歸一化,并且不斷學習scale
        if normalizations:
            if normalizations[index] != -1:
                norm_name = "{}_norm".format(layer)
                net[norm_name] = L.Normalize(net[layer], scale_filler=dict(type="constant", value=normalizations[index]),
                    across_spatial=False, channel_shared=False)
                out_layer = norm_name
                arm_source_layers[index] = norm_name
        from_layer = out_layer
        out_layer = "TL{}_{}".format(num_p, 1)
        ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 1, lr_mult=lr_mult)

        if num_p == 6:  # 網絡的最後一個TCB子產品,沒有後面的輸入
            from_layer = out_layer
            out_layer = "TL{}_{}".format(num_p, 2)
            ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 1, lr_mult=lr_mult)

            from_layer = out_layer
            out_layer = "P{}".format(num_p)
            ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 1, lr_mult=lr_mult)
        else:  # 其它TCB子產品的建立
            from_layer = out_layer
            out_layer = "TL{}_{}".format(num_p, 2)
            ConvBNLayer(net, from_layer, out_layer, use_batchnorm, False, 256, 3, 1, 1, lr_mult=lr_mult)

            from_layer = "P{}".format(num_p+1)
            out_layer = "P{}-up".format(num_p+1)
            DeconvBNLayer(net, from_layer, out_layer, use_batchnorm, False, 256, 2, 0, 2, lr_mult=lr_mult)

            from_layer = ["TL{}_{}".format(num_p, 2), "P{}-up".format(num_p+1)]
            out_layer = "Elt{}".format(num_p)
            EltwiseLayer(net, from_layer, out_layer)
            relu_name = '{}_relu'.format(out_layer)
            net[relu_name] = L.ReLU(net[out_layer], in_place=True)
            out_layer = relu_name

            from_layer = out_layer
            out_layer = "P{}".format(num_p)
            ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 1, lr_mult=lr_mult)

        num_p = num_p - 1

    return net


### Modify the following parameters accordingly ###
# The directory which contains the caffe code.
# We assume you are running the script at the CAFFE_ROOT.
caffe_root = os.getcwd()

# Set true if you want to start training right after generating all files.
run_soon = True
# Set true if you want to load from most recently saved snapshot.
# Otherwise, we will load from the pretrain_model defined below.
resume_training = True  # 是否需要從上次訓練中斷的地方繼續訓練
# If true, Remove old model files.
remove_old_models = False  # 是否删除老的模型

# The database file for training data. Created by data/VOC0712/create_data.sh
train_data = "examples/VOC0712/VOC0712_trainval_lmdb"
# The database file for testing data. Created by data/VOC0712/create_data.sh
test_data = "examples/VOC0712/VOC0712_test_lmdb"
# Specify the batch sampler.
resize_width = 512  # 訓練時候圖像需要resize到的寬度
resize_height = 512 # 訓練時候圖像需要resize到的高度
resize = "{}x{}".format(resize_width, resize_height)
# batch_sampler清單用在資料讀取和進行中
batch_sampler = [
        {
                'sampler': {
                        },
                'max_trials': 1,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'min_jaccard_overlap': 0.1,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'min_jaccard_overlap': 0.3,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'min_jaccard_overlap': 0.5,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'min_jaccard_overlap': 0.7,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'min_jaccard_overlap': 0.9,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        {
                'sampler': {
                        'min_scale': 0.3,
                        'max_scale': 1.0,
                        'min_aspect_ratio': 0.5,
                        'max_aspect_ratio': 2.0,
                        },
                'sample_constraint': {
                        'max_jaccard_overlap': 1.0,
                        },
                'max_trials': 50,
                'max_sample': 1,
        },
        ]
# train_transform_param字典是對訓練資料的預處理操作,資料增強
train_transform_param = {
        'mirror': True,  # 水準鏡像
        'mean_value': [104, 117, 123],  # 圖像均值
        'resize_param': {  # resize方法設定
                'prob': 1,
                'resize_mode': P.Resize.WARP,
                'height': resize_height,
                'width': resize_width,
                'interp_mode': [
                        P.Resize.LINEAR,
                        P.Resize.AREA,
                        P.Resize.NEAREST,
                        P.Resize.CUBIC,
                        P.Resize.LANCZOS4,
                        ],
                },
        'distort_param': {  # 明暗色彩等圖像變換
                'brightness_prob': 0.5,
                'brightness_delta': 32,
                'contrast_prob': 0.5,
                'contrast_lower': 0.5,
                'contrast_upper': 1.5,
                'hue_prob': 0.5,
                'hue_delta': 18,
                'saturation_prob': 0.5,
                'saturation_lower': 0.5,
                'saturation_upper': 1.5,
                'random_order_prob': 0.0,
                },
        'expand_param': {
                'prob': 0.5,
                'max_expand_ratio': 4.0,
                },
        'emit_constraint': {
            'emit_type': caffe_pb2.EmitConstraint.CENTER,
            }
        }
# 網絡測試時的參數設定
test_transform_param = {
        'mean_value': [104, 117, 123],
        'resize_param': {
                'prob': 1,
                'resize_mode': P.Resize.WARP,
                'height': resize_height,
                'width': resize_width,
                'interp_mode': [P.Resize.LINEAR],
                },
        }

# If true, use batch norm for all newly added layers.
# Currently only the non batch norm version has been tested.
# 設定batchnorm,預設為false
use_batchnorm = False
lr_mult = 1
# Use different initial learning rate.
# 初始學習率
if use_batchnorm:
    base_lr = 0.0004
else:
    # A learning rate for batch_size = 1, num_gpus = 1.
    base_lr = 0.00004

# Modify the job name if you want.
job_name = "refinedet_vgg16_{}".format(resize)
# The name of the model. Modify it if you want.
model_name = "VOC0712_{}".format(job_name)

# Directory which stores the model .prototxt file.
# 網絡的*.prototxt檔案存儲路徑
save_dir = "models/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the snapshot of models.
# 網絡訓練的snapshot路徑
snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the job script and log file.
# job路徑,存儲網絡檔案、訓練log等
job_dir = "jobs/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the detection results.  檢測結果儲存路徑
output_result_dir = "{}/data/RefineDet/pascal/VOCdevkit/results/VOC2007/{}/Main".format(os.environ['HOME'], job_name)

# model definition files.
train_net_file = "{}/train.prototxt".format(save_dir)
test_net_file = "{}/test.prototxt".format(save_dir)
deploy_net_file = "{}/deploy.prototxt".format(save_dir)
solver_file = "{}/solver.prototxt".format(save_dir)
# snapshot prefix.
# 模型存儲名字字首
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
# job script path.
job_file = "{}/{}.sh".format(job_dir, model_name)

# Stores the test image names and sizes. Created by data/VOC0712/create_list.sh
# 測試檔案
name_size_file = "data/VOC0712/test_name_size.txt"
# The pretrained model. We use the Fully convolutional reduced (atrous) VGGNet.
# VGG的預訓練模型路徑
pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel"
# Stores LabelMapItem.
# 類别标簽檔案
label_map_file = "data/VOC0712/labelmap_voc.prototxt"

# MultiBoxLoss parameters.
num_classes = 21		# 網絡檢測的類别數,類别+背景
share_location = True
background_label_id = 0
train_on_diff_gt = True
normalization_mode = P.Loss.VALID
code_type = P.PriorBox.CENTER_SIZE
ignore_cross_boundary_bbox = False
mining_type = P.MultiBoxLoss.MAX_NEGATIVE  # 困難樣本挖掘的方法,MAX和OHEM
neg_pos_ratio = 3.  # 困難樣本挖掘确定樣本個數的相乘因子
loc_weight = (neg_pos_ratio + 1.) / 4.  # 權值系數
# 坐标回歸的損失函數, 'conf_loss_type'是分類的損失函數
multibox_loss_param = {
    'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
    'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
    'loc_weight': loc_weight,
    'num_classes': num_classes,
    'share_location': share_location,
    'match_type': P.MultiBoxLoss.PER_PREDICTION,
    'overlap_threshold': 0.5,
    'use_prior_for_matching': True,
    'background_label_id': background_label_id,
    'use_difficult_gt': train_on_diff_gt,
    'mining_type': mining_type,
    'neg_pos_ratio': neg_pos_ratio,
    'neg_overlap': 0.5,
    'code_type': code_type,
    'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
    'objectness_score': 0.01,
    }
loss_param = {
    'normalization': normalization_mode,
    }

# parameters for generating priors.
# minimum dimension of input image
# min_dim = 512
# conv4_3 ==> 64 x 64
# conv5_3 ==> 32 x 32
# fc7 ==> 16 x 16
# conv6_2 ==> 8 x 8
# ARM子產品參與TCB的特征層
arm_source_layers = ['conv4_3', 'conv5_3', 'fc7', 'conv6_2']
# ODM子產品的特征融合層
odm_source_layers = ['P3', 'P4', 'P5', 'P6']
# anchor的最小scale參數,為各層stride的4倍
min_sizes = [32, 64, 128, 256]
max_sizes = [[], [], [], []]
# 為各層stride的參數
steps = [8, 16, 32, 64]
# anchor生成的長寬比例,prior_box_layer中由此生成[0.5,1.0,2.0]三種尺寸的anchor
aspect_ratios = [[2], [2], [2], [2]]
# L2 normalize conv4_3 and conv5_3.
# 特征的歸一化初始參數
normalizations = [10, 8, -1, -1]
# variance used to encode/decode prior bboxes.
if code_type == P.PriorBox.CENTER_SIZE:
  prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
  prior_variance = [0.1]
flip = True  # 是否翻轉,比如anchor ratio的翻轉
clip = False # 邊界剪裁

# Solver parameters.
# Defining which GPUs to use.
# 使用GPU時所用裝置設定
gpus = "0,1,2,3"
gpulist = gpus.split(",")
num_gpus = len(gpulist)

# Divide the mini-batch to different GPUs.
# 訓練時候每張卡上訓練圖檔數目設定
batch_size = 32
accum_batch_size = 32
iter_size = accum_batch_size / batch_size
solver_mode = P.Solver.CPU
device_id = 0
batch_size_per_device = batch_size
if num_gpus > 0:
  batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))
  iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))
  solver_mode = P.Solver.GPU
  device_id = int(gpulist[0])

# 根據Loss設定基礎學習率,預設為P.Loss.VALID,因而預設學習率為0.001
if normalization_mode == P.Loss.NONE:
  base_lr /= batch_size_per_device
elif normalization_mode == P.Loss.VALID:
  base_lr *= 25. / loc_weight
elif normalization_mode == P.Loss.FULL:
  # Roughly there are 2000 prior bboxes per image.
  # TODO(weiliu89): Estimate the exact # of priors.
  base_lr *= 2000.

# Evaluate on whole test set.
# 測試網絡相關參數設定,訓練時網絡隻有train沒有test
num_test_image = 4952
test_batch_size = 1
test_iter = num_test_image / test_batch_size

# solver參數配置
solver_param = {
    # Train parameters
    'base_lr': base_lr,
    'weight_decay': 0.0005,
    'lr_policy': "multistep",
    'stepvalue': [80000, 100000, 120000],
    'gamma': 0.1,
    'momentum': 0.9,
    'iter_size': iter_size,
    'max_iter': 120000,
    'snapshot': 5000,
    'display': 10,
    'average_loss': 10,
    'type': "SGD",
    'solver_mode': solver_mode,
    'device_id': device_id,
    'debug_info': False,
    'snapshot_after_train': True,
    # Test parameters
    # 'test_iter': [test_iter],
    # 'test_interval': 5000,
    # 'eval_type': "detection",
    # 'ap_version': "11point",
    # 'test_initialization': False,
    }

# parameters for generating detection output.
# 檢測時候相關參數配置
det_out_param = {
    'num_classes': num_classes,  # 檢測的類别數
    'share_location': share_location,
    'background_label_id': background_label_id,  # 背景類ID
    'nms_param': {'nms_threshold': 0.45, 'top_k': 1000},  # NMS門檻值與NMS之後保留的檢測框個數
    'keep_top_k': 500,  # 最後網絡輸出時的檢測框個數
    'confidence_threshold': 0.01, # NMS中設定的檢測置信度門檻值
    'code_type': code_type,
    'objectness_score': 0.01,  # ARM中是否為目标的置信度門檻值
    }

# parameters for evaluating detection results.
# 進行評估時的參數設定
det_eval_param = {
    'num_classes': num_classes,
    'background_label_id': background_label_id,
    'overlap_threshold': 0.5,
    'evaluate_difficult_gt': False,
    'name_size_file': name_size_file,
    }

### Hopefully you don't need to change the following ###
# Check file. 一些相關訓練啟動檢查
check_if_exist(train_data)
check_if_exist(test_data)
check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)

# Create train net.
# 調用caffe.NetSpec()初始化得到一個網絡
net = caffe.NetSpec()
# CreateAnnotatedDataLayer函數是用來讀取資料的,函數所在腳本:
# ~RefineDet/python/caffe/model_libs.py,這部分和SSD代碼是一樣的。
net.data, net.label = CreateAnnotatedDataLayer(train_data, batch_size=batch_size_per_device,
        train=True, output_label=True, label_map_file=label_map_file,
        transform_param=train_transform_param, batch_sampler=batch_sampler)

# VGG骨幹網絡建立
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=False, dropout=False)

# AddExtraLayers函數是基于前面的得到VGG網絡結構增加2個卷積層,
# 然後對4個層執行論文中Figure1的transfer connection block操作。
# 首先取VGG的conv4_3、conv5_3、fc7、conv6_2層輸出,假設輸入圖像大小是320*320,
# 那麼這4層的輸出feature map大小分别是40*40,20*20,10*10,5*5。
# 這4層就對應Figure1中Anchor Refinement Module部分的4個灰色矩形塊,
# 接着從這4個矩形塊引出Transfer Connection Block得到P6,P5,P4,P3,
# 也就是Figure1中Object Detection Module部分的4個藍色矩形塊。
# 這就是AddExtraLayers函數實作的内容。
AddExtraLayers(net, use_batchnorm, arm_source_layers, normalizations, lr_mult=lr_mult)
# 因為前面AddExtraLayers函數中對arm_source_layers執行了reverse操作,是以這裡相當于再反轉回來。
arm_source_layers.reverse()
normalizations.reverse()

# CreateRefineDetHead函數用來生成分類層、回歸層等,是比較重要的一個函數,
# 經過該函數後傳回的mbox_layers就是完整的網絡結構輸出,該函數所在腳本:
# ~RefineDet/python/caffe/model_libs.py,這部分代碼也是在原來SSD的
# CreateMultiBoxHead函數基礎上修改得到的。
# 該函數有兩個重要輸入:from_layers=arm_source_layers和
# from_layers2=odm_source_layers,前者是Figuer1中4個灰色矩形塊的集合(arm是
# Anchor Refinement Module的縮寫);後者是Figure1中4個藍色矩形塊的集合(odm是
# Object Detection Module的縮寫),初始化為['P3', 'P4', 'P5', 'P6'],
# 這也是本文和SSD算法比較大的不同點。最後大概介紹下傳回結果mbox_layers的内容:
# mbox_layers[0]是"arm_loc",表示bbox的回歸輸出;
# mbox_layers[1]是"arm_conf",表示bbox的分類輸出(是否是object的二分類);
# mbox_layers[2]是"arm_priorbox",表示priorbox(anchor)的資訊;
# mbox_layers[3]是”odm_conf“,表示bbox的回歸輸出;
# mbox_layers[4]是”odm_loc“,表示bbox的分類輸出(類别數是所有object的類别數+背景)。
mbox_layers = CreateRefineDetHead(net, data_layer='data', from_layers=arm_source_layers,
        use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
        aspect_ratios=aspect_ratios, steps=steps, normalizations=[],
        num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
        prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult, from_layers2=odm_source_layers)

# 定義好網絡結構後,就要定義損失函數了。
# 先定義”arm_loss“,通過L.MultiBoxLoss接口來計算損失函數。mbox_layers_arm清單
# 就是儲存了bbox的回歸輸出(”arm_loc“)、分類輸出(”arm_conf“)、anchor(或者叫priorbox)
# 資訊(”arm_priorbox“)、ground truth資訊(net.label),multibox_loss_param_arm中
# 除了分類類别數(”num_classes=2“)與原來的配置不同外,其他都是沿用原來的配置。
# 回傳損失隻回傳前面兩個變量。這部分損失基本上和RPN網絡類似,分類部分的損失根據
# mbox_layers[1](”arm_conf“)和net.label來得到,回歸部分的損失根據
# mbox_layers[0](”arm_loc“)和mbox_layers[2](”arm_priorbox“)來得到。
# MultiBoxLoss是自定義層,最早見于SSD算法中,這裡稍作修改,源碼參考
# https://github.com/sfzhang15/RefineDet/blob/master/src/caffe/layers/multibox_loss_layer.cpp。
# 不過在這裡調用時候和在SSD中調用沒有差別(輸入清單都是4個變量),隻不過分類的類别數變化了而已。
name = "arm_loss"
mbox_layers_arm = []
mbox_layers_arm.append(mbox_layers[0]) # mbox_layers[0]是"arm_loc",表示bbox的回歸輸出
mbox_layers_arm.append(mbox_layers[1]) # mbox_layers[1]是"arm_conf",表示bbox的分類輸出(是否是object的二分類)
mbox_layers_arm.append(mbox_layers[2]) # mbox_layers[2]是"arm_priorbox",表示priorbox(anchor)的資訊
mbox_layers_arm.append(net.label)
multibox_loss_param_arm = multibox_loss_param.copy()
multibox_loss_param_arm['num_classes'] = 2
net[name] = L.MultiBoxLoss(*mbox_layers_arm, multibox_loss_param=multibox_loss_param_arm,
        loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
        propagate_down=[True, True, False, False])

# 這一部分代碼主要是将net["arm_conf"]作為softmax函數的輸入,并得到分類機率輸出net[flatten_name],
# 或者寫成net["arm_conf_flatten"]。net["arm_conf"]是前面arm部分的二分類輸出結果,
# 是以這部分操作和Faster RCNN中得到proposal的過程幾乎是一樣的。
# Create the MultiBoxLossLayer.
conf_name = "arm_conf"
reshape_name = "{}_reshape".format(conf_name)
net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, 2]))
softmax_name = "{}_softmax".format(conf_name)
net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
flatten_name = "{}_flatten".format(conf_name)
net[flatten_name] = L.Flatten(net[softmax_name], axis=1)

# 定義”odm_loss“,也是通過L.MultiBoxLoss接口來計算損失函數。
# mbox_layers_odm清單儲存了bbox的回歸輸出(”odm_loc“)、分類輸出(”odm_conf“)、
# anchor(或者叫priorbox)資訊(”arm_priorbox“)、gound truth資訊(net.label)、
# 分類的機率輸出(net["arm_conf_flatten"])、ARM部分的bbox的回歸輸出(net[”arm_loc“])。
# 除了前面兩個變量外,後面4個變量都是為了做bbox的過濾和正負樣本的平衡,是以回傳損失隻回傳前面兩個變量。
# 這部分的損失函數計算和SSD算法類似,分類支路的損失根據mbox_layers[4](”odm_conf“)和
# net.label得到,回歸支路的損失根據mbox_layers[3](”odm_loc“)和
# mbox_layers[2](”arm_priorbox“)得到。這裡可以看到輸入清單變成6個變量,
# 這裡就是RefineNet中對MultiBoxLossLayer的修改:增加了兩個輸入變量。
# 這部分非常重要,也是RefineDet的一個亮點的展現。這兩個變量中的net["arm_conf_flatten"]
# 主要參與到hard negative mining過程,是對負樣本排序和sample的(文中說的是負樣本
# 的confidence(也就是判為負樣本的機率)大于門檻值0.99,則該樣本不參與到ODM部分的訓練)。
# 另一個變量net[”arm_loc“]提供了bbox的初始坐标,有利于檢測網絡得到更準确的結果。
name = "odm_loss"
mbox_layers_odm = []
mbox_layers_odm.append(mbox_layers[3]) # mbox_layers[3]是”odm_conf“,表示bbox的回歸輸出
mbox_layers_odm.append(mbox_layers[4]) # mbox_layers[4]是”odm_loc“,表示bbox的分類輸出(類别數是所有object的類别數+背景)
mbox_layers_odm.append(mbox_layers[2]) # mbox_layers[2]是"arm_priorbox",表示priorbox(anchor)的資訊
mbox_layers_odm.append(net.label)
mbox_layers_odm.append(net[flatten_name])
mbox_layers_odm.append(mbox_layers[0]) # mbox_layers[0]是"arm_loc",表示bbox的回歸輸出
net[name] = L.MultiBoxLoss(*mbox_layers_odm, multibox_loss_param=multibox_loss_param,
        loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
        propagate_down=[True, True, False, False, False, False])

# 測試相關網絡建立,網絡定義檔案導出等,就不作解釋
......
           

2. Reference

  1. RefineDet算法源碼 (一)訓練腳本