先導知識:
Python 标準庫之 xml.etree.ElementTree-如何生成xml tree
Python中XML的讀寫總結
python的xlrd包-讀寫excel檔案
1、首先了解xml檔案結構:
每個element對象都具有以下屬性:
- tag:string對象,表示資料代表的種類。
- attrib:dictionary對象,表示附有的屬性。
- text:string對象,表示element的内容。
- tail:string對象,表示element閉合之後的尾迹。
-
若幹子元素(child elements)。
texttail
1 2 3 4
2、然後了解xlrd庫和xml.etree.ElementTree庫:
#data = xlrd.open_workbook(‘檔案名’) 打開Excel檔案
#ret = data.sheet_names() 擷取所有工作表名
#sheet = data.sheet_by_name(ret) 擷取ret工作表中所有資料
#sheet.neows 擷取行數
#sheet.ncols 擷取列數
#sheet.cell_value(1,2) 擷取0行1列單元格資料
導入:import xml.etree.ElementTree as ET
XML是中結構化資料形式,在ET中使用ElementTree代表整個XML文檔,并視其為一棵樹,Element代表這個文檔樹中的單個節點。
建立節點:root = ET.Element(‘Root’)
建立子節點:child1 = ET.SubElement(root,‘child1’)
建立文檔:tree = ET.ElementTree(root)
設定文本值:element.text = ‘default’
設定屬性:element.set(‘age’, str(i))
添加節點:root.append(element)
寫入文檔:tree.write(‘default.xml’, encoding=‘utf-8’, xml_declaration=True)
3、代碼:
import xml.etree.ElementTree as ET
import xlrd
read data from a xlsx file and assign them to a list
#read data from a xlsx'file and assign them to a list
ExcelFileName = 'E:\Photovoltaic power station inspection project/label.xlsx'
workbook = xlrd.open_workbook(ExcelFileName)
worksheet = workbook.sheet_by_name("Sheet1")
num_rows = worksheet.nrows
num_cols = worksheet.ncols
#print('num_rows',num_rows,num_cols)
result_data = []#把資料
for curr_row in range(0, num_rows, 1):
row_data = []
for curr_col in range(0, num_cols, 1):
data = worksheet.cell_value(curr_row, curr_col)
row_data.append(data)
result_data.append(row_data)
#print(result_data)
length = len(result_data)#194
create the file structure
def __indent(elem, level=0):#增加換行符
i = "\n" + level*"\t"
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
__indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
生成xml檔案
classes = ['無缺陷','遮擋','熱斑']
annotation = ET.Element('annotation')#根節點
#定義建立size子節點的函數
def create_name_size(el):
width = ET.SubElement(el, 'width')#建立子節點
width.text = '4056'#定義字元
height = ET.SubElement(el, 'height')
height.text = '3040'
depth = ET.SubElement(el, 'depth')
depth.text = '3'
#第0行是備注資訊,讀取1-193行資料,每一行生成一個檔案
for i in range(173,length):
#print('i=%d'%i)
size = ET.SubElement(annotation, 'size')
folder = ET.SubElement(annotation,'folder')
folder.text = 'JPEGImages'
filename = ET.SubElement(annotation,'filename')
filename.text = '00'+str(i)+'.JPG'
path = ET.SubElement(annotation,'path')
path.text = 'E:\Photovoltaic power station inspection project\VOCdevkit\VOC2012\JPEGImages/'+filename.text
source = ET.SubElement(annotation,'source')
database = ET.SubElement(source,'database')#source的子節點
database.text = 'Unknown'
create_name_size(size)
segmented = ET.SubElement(annotation,'segmented')
segmented.text = '0'
if int(result_data[i][1])!=0:
for ob in range(int(result_data[i][2])):
object = ET.SubElement(annotation, 'object')
name = ET.SubElement(object,'name')
name.text = classes[int(result_data[i][1])]
pose = ET.SubElement(object,'pose')
pose.text = 'Unspecified'
truncated = ET.SubElement(object,'truncated')
truncated.text = '0'
difficult = ET.SubElement(object,'difficult')
difficult.text = '0'
bndbox = ET.SubElement(object,'bndbox')
xmin = ET.SubElement(bndbox,'xmin')
xmin.text = str(int(result_data[i][ob*4+3]))
ymin = ET.SubElement(bndbox,'ymin')
ymin.text = str(int(result_data[i][ob*4+4]))
xmax = ET.SubElement(bndbox, 'xmax')
xmax.text = str(int(result_data[i][ob*4+5]))
ymax = ET.SubElement(bndbox, 'ymax')
ymax.text = str(int(result_data[i][ob*4+6]))
__indent(annotation)#增加換行符
#print(ET.dump(annotation))
#create a new XML file with the results
tree = ET.ElementTree(annotation)
tree.write('00'+str(i)+'.xml', encoding='utf-8', xml_declaration=None)
pass
pass
參考:将Excel表格資料轉換成XML格式檔案