應用場景
由于業務需求,需要對部分不符合檢測結果的圖像進行過濾,是以需要對之前的檢測項目進行優化。常見問題有如下亮點:
- 圖像中檢測目标是傾斜角度;
-
圖像中是通過鏡子自拍或者加了濾鏡處理後的相片;
這兩種情況是由于訓練樣本中含有這兩種情況的少,是以需要增加此類樣本數。本文隻針對第一種情況進行資料增強,解決辦法——旋轉。
素材
項目是對服裝進行檢測,樣本圖(來源于使用者曬圖):

其對應的xml檔案:
<annotation>
<folder>well</folder>
<filename>15278480618780.jpg</filename>
<path>15278480618780.jpg</path>
<size>
<width>828</width>
<height>1104</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>3</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>250</xmin>
<ymin>672</ymin>
<xmax>531</xmax>
<ymax>1104</ymax>
</bndbox>
</object>
</annotation>
從xml資訊中可以看見圖像的具體資訊,包括圖像名稱,尺寸以及檢測方框的坐标範圍。
處理程式
這裡介紹處理批量處理檔案夾中的情形,單張圖像處理類似。
處理思想
- 讀取對應的圖像,解析對應的xml,根據旋轉的角度來變換之前檢測到的坐标,以及儲存變換後的圖像。
處理代碼
#!/usr/bin/env python
import cv2
import math
import numpy as np
import os
import pdb
import xml.etree.ElementTree as ET
class ImgAugemention():
def __init__(self):
self.angle = 90
# rotate_img
def rotate_image(self, src, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
# convet angle into rad
rangle = np.deg2rad(angle) # angle in radians
# calculate new image width and height
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# map
return cv2.warpAffine(
src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4)
def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
# get width and heigh of changed image
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# rot_mat: the final rot matrix
# get the four center of edges in the initial martix,and convert the coord
point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
# concat np.array
concat = np.vstack((point1, point2, point3, point4))
# change type
concat = concat.astype(np.int32)
print(concat)
rx, ry, rw, rh = cv2.boundingRect(concat)
return rx, ry, rw, rh
def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list):
# assign the rot angles
for angle in angle_list:
for img_name in os.listdir(imgs_path):
# split filename and suffix
n, s = os.path.splitext(img_name)
# for the sake of use yolo model, only process '.jpg'
if s == ".jpg":
img_path = os.path.join(imgs_path, img_name)
img = cv2.imread(img_path)
rotated_img = self.rotate_image(img, angle)
save_name = n + "_" + str(angle) + "d.jpg"
# 寫入圖像
cv2.imwrite(img_save_path + save_name, rotated_img)
print("log: [%sd] %s is processed." % (angle, img))
xml_url = img_name.split('.')[0] + '.xml'
xml_path = os.path.join(xmls_path, xml_url)
tree = ET.parse(xml_path)
file_name = tree.find('filename').text # it is origin name
path = tree.find('path').text # it is origin path
# change name and path
tree.find('filename').text = save_name # change file name to rot degree name
tree.find('path').text = save_name # change file path to rot degree name
root = tree.getroot()
for box in root.iter('bndbox'):
xmin = float(box.find('xmin').text)
ymin = float(box.find('ymin').text)
xmax = float(box.find('xmax').text)
ymax = float(box.find('ymax').text)
x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
# change the coord
box.find('xmin').text = str(x)
box.find('ymin').text = str(y)
box.find('xmax').text = str(x+w)
box.find('ymax').text = str(y+h)
box.set('updated', 'yes')
# write into new xml
tree.write(xml_save_path + n + "_" + str(angle) + "d.xml")
print("[%s] %s is processed." % (angle, img_name))
if __name__ == '__main__':
img_aug = ImgAugemention()
imgs_path = './image/'
xmls_path = './xml/'
img_save_path = './rotate/'
xml_save_path = './xml_rot/'
angle_list = [60, 90, 120, 150, 210, 240, 300]
img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list)
處理結果
- 旋轉60度
<annotation>
<folder>well</folder>
<filename>15278480618780_60d.jpg</filename>
<path>15278480618780_60d.jpg</path>
<size>
<width>828</width>
<height>1104</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>3</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox updated="yes">
<xmin>777</xmin>
<ymin>701</ymin>
<xmax>1152</xmax>
<ymax>945</ymax>
</bndbox>
</object>
</annotation>
- 旋轉90度
<annotation>
<folder>well</folder>
<filename>15278480618780_90d.jpg</filename>
<path>15278480618780_90d.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>828</width>
<height>1104</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>3</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox updated="yes">
<xmin>672</xmin>
<ymin>297</ymin>
<xmax>1105</xmax>
<ymax>579</ymax>
</bndbox>
</object>
</annotation>