天天看点

2021-07-04 关于COCO-VOC数据格式之间的转换和可视化代码前言一、COCO转VOC二、VOC转COCO格式三、COCO格式数据可视化四、VOC格式数据可视化总结

文章目录

  • 前言
  • 一、COCO转VOC
  • 二、VOC转COCO格式
    • 1.先把VOC转成CSV文件
    • 2.csv转成coco格式
  • 三、COCO格式数据可视化
  • 四、VOC格式数据可视化
  • 总结

前言

不同网络模型适应的数据格式有所不同,主要记录COCO和VOC格式之间的数据转换,以及数据的可视化。

提示:以下是本篇文章正文内容,可供参考

一、COCO转VOC

这部分代码主要用于COCO格式的数据转换为VOC格式的数据。具体使用方法只需要更改路径即可。

import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re


def instance2xml_base(anno):
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.annotation(
        E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
        E.filename(anno['file_name']),
        E.source(
            E.database('MS COCO 2014'),
            E.annotation('MS COCO 2014'),
            E.image('Flickr'),
            # E.url(anno['coco_url'])
        ),
        E.size(
            E.width(anno['width']),
            E.height(anno['height']),
            E.depth(3)
        ),
        E.segmented(0),
    )
    return anno_tree


def instance2xml_bbox(anno, bbox_type='xyxy'):
    """bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
    assert bbox_type in ['xyxy', 'xywh']
    if bbox_type == 'xyxy':
        xmin, ymin, w, h = anno['bbox']
        xmax = xmin + w
        ymax = ymin + h
    else:
        xmin, ymin, xmax, ymax = anno['bbox']
    E = objectify.ElementMaker(annotate=False)
    anno_tree = E.object(
        E.name(anno['category_id']),
        E.bndbox(
            E.xmin(xmin),
            E.ymin(ymin),
            E.xmax(xmax),
            E.ymax(ymax)
        ),
        E.difficult(anno['iscrowd'])
    )
    return anno_tree


def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(
        map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
                                          os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))


def keypoints2xml_base(anno):
    annotation = etree.Element("annotation")
    etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
    etree.SubElement(annotation, "filename").text = anno['file_name']
    source = etree.SubElement(annotation, "source")
    etree.SubElement(source, "database").text = "MS COCO 2014"
    etree.SubElement(source, "annotation").text = "MS COCO 2014"
    etree.SubElement(source, "image").text = "Flickr"
    etree.SubElement(source, "url").text = anno['coco_url']
    size = etree.SubElement(annotation, "size")
    etree.SubElement(size, "width").text = str(anno["width"])
    etree.SubElement(size, "height").text = str(anno["height"])
    etree.SubElement(size, "depth").text = '3'
    etree.SubElement(annotation, "segmented").text = '0'
    return annotation


def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
    assert bbox_type in ['xyxy', 'xywh']
    if bbox_type == 'xyxy':
        xmin, ymin, w, h = anno['bbox']
        xmax = xmin + w
        ymax = ymin + h
    else:
        xmin, ymin, xmax, ymax = anno['bbox']
    key_object = etree.SubElement(xmltree, "object")
    etree.SubElement(key_object, "name").text = anno['category_id']
    bndbox = etree.SubElement(key_object, "bndbox")
    etree.SubElement(bndbox, "xmin").text = str(xmin)
    etree.SubElement(bndbox, "ymin").text = str(ymin)
    etree.SubElement(bndbox, "xmax").text = str(xmax)
    etree.SubElement(bndbox, "ymax").text = str(ymax)
    etree.SubElement(key_object, "difficult").text = '0'
    keypoints = etree.SubElement(key_object, "keypoints")
    for i in range(0, len(keypoints_dict)):
        keypoint = etree.SubElement(keypoints, keypoints_dict[i + 1])
        etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i * 3])
        etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i * 3 + 1])
        etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i * 3 + 2])
    return xmltree


def parse_keypoints(content, outdir):
    keypoints = dict(
        zip(range(1, len(content['categories'][0]['keypoints']) + 1), content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print("Formating keypoints xml file {} done!".format(name))


def main(args):
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    content = json.load(open(args.anno_file, 'r'))
    if args.type == 'instance':
        # make subdirectories
        sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
        for sub_dir in sub_dirs:
            sub_dir = os.path.join(args.output_dir, str(sub_dir))
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)
        parse_instance(content, args.output_dir)
    elif args.type == 'keypoint':
        parse_keypoints(content, args.output_dir)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--anno_file", default='./annotations.json',
                        help="annotation file for object instance/keypoint")
    parser.add_argument("--type", default='instance', type=str, help="object instance or keypoint",
                        choices=['instance', 'keypoint'])
    parser.add_argument("--output_dir", default='./xml', help="output directory for voc annotation xml file")
    args = parser.parse_args()
    main(args)
           

二、VOC转COCO格式

1.先把VOC转成CSV文件

代码如下:

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET


def xml_to_csv(path):
    xml_list = []
    #i = 0
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        root.find('filename').text = os.path.basename(xml_file).replace('xml', 'jpg')
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[1].text),
                     int(root.find('size')[0].text),
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text),
                     member[0].text,
                     #i,
                     #((int(member[4][0].text)+int(member[4][2].text))/2),   #改
                     #((int(member[4][1].text)+int(member[4][3].text))/2)    #改
                     )
            xml_list.append(value)
            #i = i + 1
    column_name = ['filename', 'height', 'width',  'xmin', 'ymin', 'xmax', 'ymax','class']#,'X_center','Y_center','id'
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def main():
   #    image_path = os.path.join(os.getcwd(), 'annotations')
    image_path = os.path.join(os.getcwd(), 'Annotations')
    xml_df = xml_to_csv(image_path)
    xml_df.to_csv('all_lab.csv', index=None)
    print('Successfully converted xml to csv.')
main()
           

2.csv转成coco格式

代码如下:

# -*- coding: utf-8 -*-
import os
import json
import numpy as np
import pandas as pd
import glob
import cv2
import os
import shutil
from IPython import embed
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import time

np.random.seed(41)
#0为背景
classname_to_id = {"particle": 1} #此处修改为自己的类和对应的ID

class Csv2CoCo:

    def __init__(self,image_dir,total_annos):
        self.images = []
        self.annotations = []
        self.categories = []
        self.img_id = 0
        self.ann_id = 0
        self.image_dir = image_dir#图片位置
        self.total_annos = total_annos#标注信息 key + value

    def save_coco_json(self, instance, save_path):#定义json文件的保存
        json.dump(instance, open(save_path, 'w'), ensure_ascii=False, indent=2)  # indent=2 更加美观显示
    # 由txt文件构建COCO
    def to_coco(self, keys): #定义 转换coco
        self._init_categories() #写入类别
        for key in keys:
            self.images.append(self._image(key)) # 调用 _image 
            shapes = self.total_annos[key]
            for shape in shapes:
                bboxi = []
                for cor in shape[2:-1]:
                    bboxi.append(int(cor))
                label = shape[-1]
                annotation = self._annotation(bboxi,label,key)
                self.annotations.append(annotation)
                self.ann_id += 1   #从0开始累加
            self.img_id += 1       #从0开始加
        instance = {}
        instance['info'] = 'spytensor created'
        instance['license'] = ['license']
        instance['images'] = self.images
        instance['annotations'] = self.annotations
        instance['categories'] = self.categories
        return instance

    # 构建类别
    def _init_categories(self):
        for k, v in classname_to_id.items():
            category = {}
            category['id'] = v # value
            category['name'] = k # key
            self.categories.append(category)

    # 构建COCO的image字段
    def _image(self, path):
        image = {}
        img = cv2.imread(self.image_dir + path) #读取图片 image_dir:位置 
        image['file_name'] = path    # 图片名字        
        image['height'] = img.shape[0] #图片的高赋值给 image
        image['width'] = img.shape[1]  #宽
        image['id'] = self.img_id    #图片的id 从0开始叠加
        
        return image

    # 构建COCO的annotation字段
    def _annotation(self, shape,label,file_name):
        #label = shape[-1]
        points = shape[:4]
        annotation = {}
        annotation['segmentation'] = self._get_seg(points)        
        annotation['area'] = self._get_area(points)
        annotation['iscrowd'] = 0
        annotation['image_id'] = self.img_id #图片 id
        annotation['bbox'] = self._get_box(points)
        annotation['category_id'] = int(label) # 类别id 1
        annotation['id'] = self.ann_id #标注信息的id       
        return annotation

    # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
    def _get_box(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        return [min_x, min_y, max_x - min_x, max_y - min_y]
    # 计算面积
    def _get_area(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        return (max_x - min_x+1) * (max_y - min_y+1)
    # segmentation
    def _get_seg(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        h = max_y - min_y
        w = max_x - min_x
        a = []
        # a.append([min_x,min_y, min_x,min_y+0.5*h, min_x,max_y, min_x+0.5*w,max_y, max_x,max_y, max_x,max_y-0.5*h, max_x,min_y, max_x-0.5*w,min_y])
        return a
   
if __name__ == '__main__':
    csv_file = "all_lab.csv"  #生成的csv文件
    image_dir = "./JPEGImages/"         #图像文件夹地址
    saved_coco_path = "./"        #生成coco格式文件的地址
    # 整合csv格式标注文件
    total_csv_annotations = {}
    annotations = tqdm(pd.read_csv(csv_file,header=1).values)#pd.read_csv读取csv文件;通过.value赋给annotations(赋值包括字符和数字)从第一行开始
    for annotation in annotations:
        time.sleep(0.1)
        key = annotation[0].split(os.sep)[-1]  #key的值是filename 
        value = np.array([annotation[1:]])  #value是除去名字的标注信息 
        if key in total_csv_annotations.keys():
            total_csv_annotations[key] = np.concatenate((total_csv_annotations[key],value),axis=0)#concatenate用于多个数组的连接
            # print(total_csv_annotations[key])
        else:
            total_csv_annotations[key] = value # print(total_csv_annotations[key]) 
    # 按照键值划分数据
    total_keys = list(total_csv_annotations.keys()) #所有图片的filename 000000.jpg
    # 此处划分训练集和验证集,test_size=0.1代表训练集:验证集 = 9:1
    trainval_keys, test_keys = train_test_split(total_keys, test_size=0.1) #修改 有问题
    train_keys, val_keys = train_test_split(trainval_keys, test_size=0.1)  #修改
    print("train_n:", len(trainval_keys), 'test_n:', len(test_keys),'val_n:', len(val_keys))
    # 创建必须的文件夹
    if not os.path.exists('%scoco/annotations/'%saved_coco_path):
        os.makedirs('%scoco/annotations/'%saved_coco_path)
    if not os.path.exists('%scoco/train2017/'%saved_coco_path):
        os.makedirs('%scoco/train2017/'%saved_coco_path)
    if not os.path.exists('%scoco/val2017/'%saved_coco_path):
        os.makedirs('%scoco/val2017/'%saved_coco_path)
    if not os.path.exists('%scoco/test2017/'%saved_coco_path):
        os.makedirs('%scoco/test2017/'%saved_coco_path)
    #把训练集转化为COCO的json格式
    l2c_train = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations)#img_dir是图片地址,total_csv_annotations 是所有的标注信息(key是图片id,值是标注信息)
    train_instance = l2c_train.to_coco(train_keys)#训练集转换
    l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json'%saved_coco_path)
    # 复制原始图像到train2017和val2017文件夹下
    for file in train_keys:
        shutil.copy(image_dir+file,"%scoco/train2017/"%saved_coco_path)
    for file in val_keys:
        shutil.copy(image_dir+file,"%scoco/val2017/"%saved_coco_path)
    for file in test_keys:
        shutil.copy(image_dir+file,"%scoco/test2017/"%saved_coco_path)
    # 把验证集转化为COCO的json格式
    l2c_val = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations)
    val_instance = l2c_val.to_coco(val_keys)
    l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json'%saved_coco_path)
    l2c_val = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations)
    test_instance = l2c_val.to_coco(test_keys)
    l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_test2017.json'%saved_coco_path)

           

三、COCO格式数据可视化

import os
import cv2

from pycocotools.coco import COCO

json_file = r'F:\data\tianchi\DATA\annotations.json'
dataset_dir = r'F:\data\tianchi\DATA\images/'
coco = COCO(json_file)
catIds = coco.getCatIds(catNms=['particles'])  # catIds=1 表示particle这一类
imgIds = coco.getImgIds(catIds=catIds)  # 图片id,许多值
imgIds = [1219, 1220, 1221, 1222, 1223]
for i in range(len(imgIds)):
    img = coco.loadImgs(imgIds[i])[0]

    image = cv2.imread(dataset_dir + img['file_name'])
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    annos = coco.loadAnns(annIds)

    bbox = annos[0]['bbox']
    x, y, w, h = bbox

    anno_image = cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 255), 2)
    # 参数为(显示的图片名称,要显示的图片)  必须加上图片名称,不然会报错
    cv2.namedWindow('demo', cv2.WINDOW_NORMAL)
    cv2.imshow('demo', anno_image)
    cv2.waitKey(1000)
           

四、VOC格式数据可视化

import os
import cv2
import re

pattens = ['name', 'xmin', 'ymin', 'xmax', 'ymax']


def get_annotations(xml_path):
    bbox = []
    with open(xml_path, 'r') as f:
        text = f.read().replace('\n', 'return')
        p1 = re.compile(r'(?<=<object>)(.*?)(?=</object>)')
        result = p1.findall(text)
        for obj in result:
            tmp = []
            for patten in pattens:
                p = re.compile(r'(?<=<{}>)(.*?)(?=</{}>)'.format(patten, patten))
                if patten == 'name':
                    tmp.append(p.findall(obj)[0])
                else:
                    tmp.append(int(float(p.findall(obj)[0])))
            bbox.append(tmp)
    return bbox


def save_viz_image(image_path, xml_path, save_path):
    bbox = get_annotations(xml_path)
    #print(image_path)
    image2 = cv2.imread(image_path)
  #  print(image)
    for info in bbox:
        cv2.rectangle(image2, (info[1], info[2]), (info[3], info[4]), (255, 0, 0), thickness=2)
        cv2.putText(image2, info[0], (info[1], info[2]), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 2)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    cv2.imwrite(os.path.join(save_path, image_path.split('/')[-1]), image2)


if __name__ == '__main__':
    image_dir = 'F:/data/tianchi/DATA/images/'
    xml_dir = 'F:/data/tianchi/DATA/xml/particles/'
    save_dir = 'viz_images'
    image_list = os.listdir(image_dir)
    for i in image_list:
        image_path = os.path.join(image_dir, i)
        xml_path = os.path.join(xml_dir, i.replace('.jpg', '.xml'))
        save_viz_image(image_path, xml_path, save_dir)

           

总结

数据的转换和可视化也是比较重要的工作,以确保网络能够在正确的数据集上开始训练和优化,同时也是可以防止在数据层出现错误。

继续阅读