網絡結構圖
基礎網絡塊可自定義,可使用VGG、ResNet、DenseNet,這裡使用了簡單三層卷積+SSD的網絡配置。
多尺度特征塊:使用兩層3*3的網絡保持圖檔大小不變後,使用2*2的平均池化,将圖檔大小減半,來提取不同尺度的特征。
類别、邊界框預測:使用多通道的輸出來預測結果。
檔案名SSD_model.py
import sys
sys.path.insert(0, '..')
import gluonbook as gb
from mxnet import autograd, contrib, gluon, image, init, nd
from mxnet.gluon import loss as gloss, nn
import time
def cls_predictor(num_anchors, num_classes):
return nn.Conv2D(num_anchors * (num_classes + 1), kernel_size=3,
padding=1)
def bbox_predictor(num_anchors):
return nn.Conv2D(num_anchors * 4, kernel_size=3, padding=1)
def forward(x, block):
block.initialize()
return block(x)
def flatten_pred(pred):
return pred.transpose((0, 2, 3, 1)).flatten()
def concat_preds(preds):
return nd.concat(*[flatten_pred(p) for p in preds], dim=1)
def down_sample_blk(num_channels):
blk = nn.Sequential()
for _ in range(2):
blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1),
nn.BatchNorm(in_channels=num_channels),
nn.Activation('relu'))
blk.add(nn.MaxPool2D(2))
return blk
def body_blk():
blk = nn.Sequential()
for num_filters in [16, 32, 64]:
blk.add(down_sample_blk(num_filters))
return blk
def get_blk(i):
if i == 0:
blk = body_blk()
elif i == 4:
blk = nn.GlobalMaxPool2D()
else:
blk = down_sample_blk(128)
return blk
def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
Y = blk(X)
anchor = contrib.ndarray.MultiBoxPrior(Y, sizes=size, ratios=ratio)
cls_pred = cls_predictor(Y)
bbox_pred = bbox_predictor(Y)
return (Y, anchor, cls_pred, bbox_pred)
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
[0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
class TinySSD(nn.Block):
def __init__(self, num_classes, **kwargs):
super(TinySSD, self).__init__(**kwargs)
self.num_classes = num_classes
for i in range(5):
setattr(self, 'blk_%d' % i, get_blk(i))
setattr(self, 'cls_%d' % i, cls_predictor(num_anchors,
num_classes))
setattr(self, 'bbox_%d' % i, bbox_predictor(num_anchors))
def forward(self, X):
anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
for i in range(5):
X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(
X, getattr(self, 'blk_%d' % i), sizes[i], ratios[i],
getattr(self, 'cls_%d' % i), getattr(self, 'bbox_%d' % i))
# 每個子產品的錨框需要連結。
return (nd.concat(*anchors, dim=1),
concat_preds(cls_preds).reshape(
(0, -1, self.num_classes + 1)), concat_preds(bbox_preds))
訓練
檔案名:SSD_run.py
from SSD_model import *
from load_my_data import load_my_data
import time
# 本函數已儲存在 gluonbook 包中友善以後使用。
path = './VOCtemplate/VOC2012/Annotations/'
batch_size,edge_size = 4,256
train_data=load_my_data(batch_size,edge_size)
#batch = train_iter.next()
#print(batch.data[0])
train_data.reshape(label_shape=(3, 5))
net = TinySSD(num_classes=5)
#net.initialize(init=init.Xavier())
net.load_parameters('my_model.params')
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.2, 'wd': 5e-4})
cls_loss = gloss.SoftmaxCrossEntropyLoss()
bbox_loss = gloss.L1Loss()
def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
cls = cls_loss(cls_preds, cls_labels)
bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
return cls + bbox
def cls_eval(cls_preds, cls_labels):
# 由于類别預測結果放在最後一維,argmax 需要指定最後一維。
return (cls_preds.argmax(axis=-1) == cls_labels).mean().asscalar()
def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
return ((bbox_labels - bbox_preds) * bbox_masks).abs().mean().asscalar()
acc, mae = 0, 0
train_data.reset() # 從頭讀取資料。
start = time.time()
for i, batch in enumerate(train_data):
X = batch.data[0]
Y = batch.label[0]
with autograd.record():
# 生成多尺度的錨框,為每個錨框預測類别和偏移量。
anchors, cls_preds, bbox_preds = net(X)
# 為每個錨框标注類别和偏移量。
bbox_labels, bbox_masks, cls_labels = contrib.nd.MultiBoxTarget(
anchors, Y, cls_preds.transpose((0, 2, 1)))
# 根據類别和偏移量的預測和标注值計算損失函數。
l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels,
bbox_masks)
l.backward()
trainer.step(batch_size)
acc += cls_eval(cls_preds, cls_labels)
mae += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
if (i + 1) % 10 == 0:
print('step %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
i + 1, 1 - acc / (10), mae / (10), time.time() - start))
acc, mae = 0, 0
start = time.time()
if (i+1) % 100==0:
print('writing params......')
net.save_parameters('my_model.params')
開始訓練: