天天看點

python數字識别庫tesseract_使用Tesseract OCR和python進行數字識别

importosimportcv2importnumpy

KNN_SQUARE_SIDE=50# Square 50 x 50 px.defresize(cv_image,factor):new_size=tuple(map(lambdax:x*factor,cv_image.shape[::-1]))returncv2.resize(cv_image,new_size)defcrop(cv_image,box):x0,y0,x1,y1=boxreturncv_image[y0:y1,x0:x1]defdraw_box(cv_image,box):x0,y0,x1,y1=box

cv2.rectangle(cv_image,(x0,y0),(x1,y1),(0,0,255),2)defdraw_boxes_and_show(cv_image,boxes,title='N'):temp_image=cv2.cvtColor(cv_image,cv2.COLOR_GRAY2RGB)forboxinboxes:draw_box(temp_image,box)cv2.imshow(title,temp_image)cv2.waitKey(0)classBaseKnnMatcher(object):distance_threshold=0def__init__(self,source_dir):self.model,self.label_map=self.get_model_and_label_map(source_dir)@staticmethoddefget_model_and_label_map(source_dir):responses=[]label_map=[]samples=numpy.empty((0,KNN_SQUARE_SIDE*KNN_SQUARE_SIDE),numpy.float32)forlabel_idx,filenameinenumerate(os.listdir(source_dir)):label=filename[:filename.index('.png')]label_map.append(label)responses.append(label_idx)image=cv2.imread(os.path.join(source_dir,filename),0)suit_image_standard_size=cv2.resize(image,(KNN_SQUARE_SIDE,KNN_SQUARE_SIDE))sample=suit_image_standard_size.reshape((1,KNN_SQUARE_SIDE*KNN_SQUARE_SIDE))samples=numpy.append(samples,sample,0)responses=numpy.array(responses,numpy.float32)responses=responses.reshape((responses.size,1))model=cv2.KNearest()model.train(samples,responses)returnmodel,label_mapdefpredict(self,image):image_standard_size=cv2.resize(image,(KNN_SQUARE_SIDE,KNN_SQUARE_SIDE))image_standard_size=numpy.float32(image_standard_size.reshape((1,KNN_SQUARE_SIDE*KNN_SQUARE_SIDE)))closest_class,results,neigh_resp,distance=self.model.find_nearest(image_standard_size,k=1)ifdistance[0][0]>self.distance_threshold:returnNonereturnself.label_map[int(closest_class)]classDigitKnnMatcher(BaseKnnMatcher):distance_threshold=10**10classMeterValueReader(object):def__init__(self):self.digit_knn_matcher=DigitKnnMatcher(source_dir='templates')@classmethoddefget_symbol_boxes(cls,cv_image):ret,thresh=cv2.threshold(cv_image.copy(),150,255,cv2.THRESH_BINARY)contours,hierarchy=cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)symbol_boxes=[]forcontourincontours:x,y,width,height=cv2.boundingRect(contour)# You can test here for box size, though not required in your example:# if cls.is_size_of_digit(width, height):# symbol_boxes.append((x, y, x+width, y+height))symbol_boxes.append((x,y,x+width,y+height))returnsymbol_boxesdefget_value(self,meter_cv2_image):symbol_boxes=self.get_symbol_boxes(meter_cv2_image)symbol_boxes.sort()# x is first in tuplesymbols=[]forboxinsymbol_boxes:symbol=self.digit_knn_matcher.predict(crop(meter_cv2_image,box))symbols.append(symbol)returnsymbolsif__name__=='__main__':# If you want to see how boxes detection works, uncomment these:# img_bw = cv2.imread(os.path.join('original.png'), 0)# boxes = MeterValueReader.get_symbol_boxes(img_bw)# draw_boxes_and_show(img_bw, boxes)# Uncomment to generate templates from image# import random# TEMPLATE_DIR = 'templates'# img_bw = cv2.imread(os.path.join('original.png'), 0)# boxes = MeterValueReader.get_symbol_boxes(img_bw)# for box in boxes:# # You need to label templates manually after extraction# cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))img_bw=cv2.imread(os.path.join('original.png'),0)vr=MeterValueReader()printvr.get_value(img_bw)