TensorFlow真的是難如上青天,但沒辦法自己課題選了深度學習,架構選了TensorFlow,即便是含着淚也要做下去。下面記錄一下自己這兩天的辛酸史,特此做個記錄!
本文在參考了很多的部落格和資料的基礎上把TensorFlow Object Detection API流程走了一遍,在此也特向這些部落格的作者感謝,文末會給出參考來源!
1.資料集标注
首先是資料集的标注,軟體采用labelImg
點選Open Dir可打開目錄,點選Create\nRectBox即可開始選擇區域,選擇完畢後輸入标簽的類型,點選save進行儲存為xml格式,接着點選next進行标注下一張圖像,直到所有的圖像标注完畢。
r

注意我們的資料集有訓練的資料集和驗證的資料集,是以需要分别建立兩個檔案夾,一個命名為train一個為test。
2.将資料集轉為tfrecord格式
現将之前标注的xml格式變為csv格式
'''
隻需修改三處,第一第二處改成對應的檔案夾目錄,
第三處改成對應的檔案名,這裡是train.csv
os.chdir('D:\\python3\\models-master\\research\\object_detection\\images\\train')
path = 'D:\\python3\\models-master\\research\\object_detection\\images\\train'
xml_df.to_csv('train.csv', index=None)
'''
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
os.chdir('G:\\資料集\\train')
path = 'G:\\資料集\\train'
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = ("image"+root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
image_path = path
xml_df = xml_to_csv(image_path)
xml_df.to_csv('train.csv', index=None)
print('Successfully converted xml to csv.')
main()
建立在models-master\research\object_detection路徑下建立 generate_TFR.py檔案,并将之前的train.csv 和 test.csv放到data檔案夾裡
"""
Usage:
# From tensorflow/models/
# Create train data:
python generate_TFR.py --csv_input=data/train.csv --output_path=data/train.record
# Create test data:
python generate_TFR.py --csv_input=data/test.csv --output_path=data/test.record
需要修改三處
os.chdir('D:\\python3\\models-master\\research\\object_detection\\')
path = os.path.join(os.getcwd(), 'images/train')
def class_text_to_int(row_label): #對應的标簽傳回一個整數,後面會有檔案用到
if row_label == 'ym':
return 1
elif row_label == 'hg':
return 2
else:
None
"""
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
os.chdir('G:\\object_detection\\models-master\\models-master\\research\\object_detection\\')
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
def class_text_to_int(row_label):
if row_label == 'ym':
return 1
elif row_label == 'hg':
return 2
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), 'images/train') #20180418做了修改
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.app.run()
轉換train.csv對應的是 python generate_TFR.py --csv_input=data/train.csv --output_path=data/train.record
test.csv也同理可得,最終生成train.record 和 test.record檔案,作為TensorFlow的輸入檔案
配置檔案和模型
模型選擇ssd_mobilenet_v1_coco.config ,檔案可在object_detection\samples\configs 裡面找到
在object_detection目錄下建立一個名為traning的檔案夾,并把ssd_mobilenet_v1_coco.config放到train檔案夾中
用文本編輯器打開作如下的修改:
# SSD with Mobilenet v1 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
'''
需修改5處
1、train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
}
這的input_path是訓練資料的路徑,改為對應的路徑,這裡是input_path:data/train.record
這的label_map_path是label路徑,這裡是label_map_path:data/ZJL_CYX.pbtxt
2、eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
shuffle: false
num_readers: 1
}
這的input_path是訓練資料的路徑,改為對應的路徑,這裡是input_path:data/test.record
這的label_map_path是label路徑,這裡是label_map_path:data/ZJL_CYX.pbtxt
3、ssd {
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
num_classes是标簽類别數,這裡隻有傑倫和奕迅,是以 num_classes: 2
4、train_config: {
batch_size: 24
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
batch_size是每次疊代的資料數,我這裡設為1
5、fine_tune_checkpoint: "ssd_mobilenet_v1_coco_11_06_2017/model.ckpt"
from_detection_checkpoint: true
這兩行注釋掉或者删除掉,否則會運作很慢
'''
其中ym_hg.pbtxt需要自己建立,内容如下所:
item {
name: "ym"
id: 1
display_name: "ym"
}
item {
name: "hg"
id: 2
display_name: "hg"
}
配置到此為止!
開始訓練
打開指令行,将目錄切換到object_detection
python model_main.py --pipeline_config_path=training/ssd_mobilenet_v1_coco.config --model_dir=training \
--num_train_steps=200000 --num_eval_steps=20 --alsologtostderr
在目前目錄下輸入,tensorboard --logdir=training,得到一個位址,在浏覽器中打開可以可視化的看到訓練的結果。
模型測試
from distutils.version import StrictVersion
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import os
from collections import defaultdict
from io import StringIO
import matplotlib.pyplot as plt
from PIL import Image
import cv2
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
# if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
# raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
# ## Env setup
# In[2]:
# This is needed to display the images.
#get_ipython().magic(u'matplotlib inline')
# ## Object detection imports
# Here are the imports from the object detection module.
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
os.chdir('G:\\object_detection\\models-master\\models-master\\research\\object_detection')
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'inference_graph'
#MODEL_FILE = MODEL_NAME + '.tar.gz'
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'ym_hg.pbtxt')
NUM_CLASSES = 2
# ## Download Model
# In[ ]:
# ## Load a (frozen) Tensorflow model into memory.
# In[5]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[6]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[7]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[8]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
#TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
TEST_IMAGE_PATHS = os.listdir('G:\\object_detection\\models-master\\models-master\\research\\object_detection\\test_images')
os.chdir('G:\\object_detection\\models-master\\models-master\\research\\object_detection\\test_images')
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
# In[9]:
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
# In[10]:
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
image=vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
#plt.figure(figsize=IMAGE_SIZE)
#plt.imshow(image_np)
#plt.show()
image = image[..., ::-1]
cv2.imshow('Object detector', image)
# Press any key to close the image
cv2.waitKey(0)
# Clean up
cv2.destroyAllWindows()
檢測結果