本篇部落客要講解調用百度API接口并做一個簡單的界面實作圖像的文字識别。
測試環境如下:
1.作業系統:windows10 X64 企業版
2.python環境:3.6.8 64位純淨版
3.已經注冊好的百度API功能區域賬号
4.python環境要安裝的API接口調用第三方庫:baidu-aip==2.2.18.0
5.PyQt5==5.12.3 pyqt5-tools == 5.13.0.1.5
main.py代碼:
# -*- coding:utf-8 -*-
import sys
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
from PyQt5 import QtGui
from mainGui import *
from aip import AipOcr
APP_ID = '23130726'
API_KEY = '9OO7jicB2LR754g3awfLv860'
SECRET_KEY = 'IBUTi5wPBmOqWyBYWwL4DA10kR1CTSNk'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
fileName_choose = None
class MyMainWindow(QMainWindow, Ui_Form):
def __init__(self, parent=None):
super(MyMainWindow, self).__init__(parent)
self.setupUi(self)
self.setWindowTitle('OCR文字識别系統')
self.pushButton_2.clicked.connect(self.openfile)
self.pushButton.clicked.connect(self.char_recognition)
""" 讀取圖檔 """
def char_recognition(self):
if fileName_choose!=None:
res = self.img_to_str(fileName_choose)
self.textEdit.setText(str(res))
else:
self.textEdit.setText('請先選擇圖檔!')
return 0
def openfile(self):
global fileName_choose
fileName_choose, _= QFileDialog.getOpenFileName(self,
"選取圖檔",
' ',
"All Files (*);;Image Files (*.png, *.jpg, *.jpeg)")
if fileName_choose == ('', ''):
return
else:
print(fileName_choose)
jpg = QtGui.QPixmap(fileName_choose).scaled(self.label.width(), self.label.height())
self.label.setPixmap(jpg)
def get_file_content(self, filePath):
with open(filePath, 'rb') as fp:
return fp.read()
def img_to_str(self, image_path):
""" 可選參數 """
options = {}
options["language_type"] = "CHN_ENG" # 中英文混合
options["detect_direction"] = "true" # 檢測朝向
options["detect_language"] = "true" # 是否檢測語言
options["probability"] = "false" # 是否傳回識别結果中每一行的置信度
""" 帶參數調用通用文字識别 """
result = client.basicGeneral(self.get_file_content(image_path), options)
# 格式化輸出-提取需要的部分
if 'words_result' in result:
text = ('\n'.join([w['words'] for w in result['words_result']]))
#print(type(result), "和", type(text))
""" save """
fs = open("result.txt", 'w+') # 将str,儲存到txt
fs.write(text)
fs.close()
return text
if __name__=="__main__":
app = QApplication(sys.argv)
myWin = MyMainWindow()
myWin.show()
sys.exit(app.exec_())
mainGui.py代碼:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'OCR識别界面.ui'
#
# Created by: PyQt5 UI code generator 5.12.3
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName("Form")
Form.resize(560, 330)
Form.setMaximumSize(QtCore.QSize(580, 350))
self.widget_2 = QtWidgets.QWidget(Form)
self.widget_2.setGeometry(QtCore.QRect(130, 10, 421, 311))
self.widget_2.setStyleSheet("color: rgb(170, 255, 255);")
self.widget_2.setObjectName("widget_2")
self.label = QtWidgets.QLabel(self.widget_2)
self.label.setGeometry(QtCore.QRect(0, 0, 411, 311))
self.label.setStyleSheet("background-color: rgb(170, 255, 255);")
self.label.setText("")
self.label.setObjectName("label")
self.widget = QtWidgets.QWidget(Form)
self.widget.setGeometry(QtCore.QRect(0, 10, 131, 311))
self.widget.setObjectName("widget")
self.verticalLayoutWidget = QtWidgets.QWidget(self.widget)
self.verticalLayoutWidget.setGeometry(QtCore.QRect(0, 0, 122, 171))
self.verticalLayoutWidget.setObjectName("verticalLayoutWidget")
self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget)
self.verticalLayout.setContentsMargins(0, 0, 0, 0)
self.verticalLayout.setObjectName("verticalLayout")
self.pushButton_2 = QtWidgets.QPushButton(self.verticalLayoutWidget)
font = QtGui.QFont()
font.setPointSize(16)
font.setItalic(True)
self.pushButton_2.setFont(font)
self.pushButton_2.setObjectName("pushButton_2")
self.verticalLayout.addWidget(self.pushButton_2)
self.pushButton = QtWidgets.QPushButton(self.verticalLayoutWidget)
font = QtGui.QFont()
font.setPointSize(16)
font.setItalic(True)
self.pushButton.setFont(font)
self.pushButton.setObjectName("pushButton")
self.verticalLayout.addWidget(self.pushButton)
spacerItem = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
self.verticalLayout.addItem(spacerItem)
self.label_2 = QtWidgets.QLabel(self.verticalLayoutWidget)
font = QtGui.QFont()
font.setPointSize(18)
font.setBold(True)
font.setWeight(75)
self.label_2.setFont(font)
self.label_2.setObjectName("label_2")
self.verticalLayout.addWidget(self.label_2)
spacerItem1 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
self.verticalLayout.addItem(spacerItem1)
self.frame = QtWidgets.QFrame(self.widget)
self.frame.setGeometry(QtCore.QRect(0, 170, 120, 150))
self.frame.setMinimumSize(QtCore.QSize(120, 150))
self.frame.setMaximumSize(QtCore.QSize(120, 150))
self.frame.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.frame.setFrameShadow(QtWidgets.QFrame.Raised)
self.frame.setObjectName("frame")
self.verticalLayout_3 = QtWidgets.QVBoxLayout(self.frame)
self.verticalLayout_3.setObjectName("verticalLayout_3")
self.textEdit = QtWidgets.QTextEdit(self.frame)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.textEdit.sizePolicy().hasHeightForWidth())
self.textEdit.setSizePolicy(sizePolicy)
self.textEdit.setMinimumSize(QtCore.QSize(110, 0))
self.textEdit.setMaximumSize(QtCore.QSize(135, 130))
self.textEdit.setObjectName("textEdit")
self.verticalLayout_3.addWidget(self.textEdit)
self.retranslateUi(Form)
QtCore.QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
_translate = QtCore.QCoreApplication.translate
Form.setWindowTitle(_translate("Form", "Form"))
self.pushButton_2.setText(_translate("Form", "載入圖檔"))
self.pushButton.setText(_translate("Form", "文字識别"))
self.label_2.setText(_translate("Form", " 識别結果:"))
測試圖檔:

letter.png
運作效果:
result.png
程式整理及可執行程式下載下傳:https://download.csdn.net/download/mzl_18353516147/14028406
由于不太擅長設計UI,是以界面不太好看,發給我同學之後我同學參考我的設計思路修改了一下:
app.py代碼:
import sys
import time
import random
from aip import AipOcr
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from PyQt5 import QtGui
from PyQt5.QtSql import QSqlQuery,QSqlDatabase
from Ui_ocrui import Ui_MainWindow
# from configDialog import Ui_Dialog #暫時未使用
APP_ID = '19105187' #APP_ID
API_KEY = 'IywiicFEKa5ny2ckUp29tHVk' #API_KEY
SECRET_KEY = 'uWnu44pu6lFjPZylFLMBymurGNTtrD2t' #SECRET_KEY
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
options = {
'detect_direction': 'true',
'language_type': 'CHN_ENG',
}
class MainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
super(MainWindow, self).__init__(parent=parent)
self.setupUi(self)
self.text = ""
self.strTime = ""
self.basicid = ""
self.filePath = ""
def loadImage(self):
self.filePath,_ = QFileDialog.getOpenFileName(self,'打開檔案','.','圖像檔案(*.png *.jpg *.jpeg)')
self.jpg = QtGui.QPixmap(self.filePath).scaled(self.plabel.width(), self.plabel.height())
self.plabel.setPixmap(self.jpg)
def recognize(self):
if(self.filePath == ""):
print(QMessageBox.warning(self, "錯誤", "沒有找到圖檔", QMessageBox.Yes, QMessageBox.Yes))
return
result = aipOcr.basicAccurate(self.get_file_content(self.filePath), options)
words_result = result['words_result']
for i in range(len(words_result)):
self.text = self.text + words_result[i]['words'] +'\n'
self.tedit.setPlainText(self.text)
self.tedit.repaint() # 解決mac端文本無法顯示的問題
self.text = ''
def get_file_content(self,filePath):
with open(filePath, 'rb') as fp:
return fp.read()
def cleanText(self):
self.tedit.repaint() # 解決mac端文本無法顯示的問題
def configApi(self):
pass
if __name__ == '__main__':
app = QApplication(sys.argv)
mainWindow = MainWindow()
mainWindow.show()
sys.exit(app.exec_())
Ui_ocrui.py代碼:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file '/Users/EIthschnapps/Documents/git/playground/ocr_pyqt5/ocrui.ui'
#
# Created by: PyQt5 UI code generator 5.9.2
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(800, 600)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.layoutWidget = QtWidgets.QWidget(self.centralwidget)
self.layoutWidget.setGeometry(QtCore.QRect(10, 10, 791, 541))
self.layoutWidget.setObjectName("layoutWidget")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout.setObjectName("horizontalLayout")
self.verticalLayout = QtWidgets.QVBoxLayout()
self.verticalLayout.setObjectName("verticalLayout")
self.opt = QtWidgets.QPushButton(self.layoutWidget)
self.opt.setObjectName("opt")
self.verticalLayout.addWidget(self.opt)
self.start = QtWidgets.QPushButton(self.layoutWidget)
self.start.setObjectName("start")
self.verticalLayout.addWidget(self.start)
self.plabel = QtWidgets.QLabel(self.layoutWidget)
self.plabel.setEnabled(True)
self.plabel.setMinimumSize(QtCore.QSize(100, 100))
self.plabel.setMaximumSize(QtCore.QSize(100, 100))
self.plabel.setText("")
self.plabel.setObjectName("plabel")
self.verticalLayout.addWidget(self.plabel)
self.clean = QtWidgets.QPushButton(self.layoutWidget)
self.clean.setObjectName("clean")
self.verticalLayout.addWidget(self.clean)
self.config = QtWidgets.QPushButton(self.layoutWidget)
self.config.setObjectName("config")
self.verticalLayout.addWidget(self.config)
self.horizontalLayout.addLayout(self.verticalLayout)
self.tedit = QtWidgets.QTextEdit(self.layoutWidget)
self.tedit.setObjectName("tedit")
self.horizontalLayout.addWidget(self.tedit)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 22))
self.menubar.setObjectName("menubar")
self.menufile = QtWidgets.QMenu(self.menubar)
self.menufile.setObjectName("menufile")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.menubar.addAction(self.menufile.menuAction())
self.retranslateUi(MainWindow)
self.opt.clicked.connect(MainWindow.loadImage)
self.start.clicked.connect(MainWindow.recognize)
self.config.clicked.connect(MainWindow.configApi)
self.clean.clicked.connect(self.tedit.clear)
self.clean.clicked.connect(self.plabel.clear)
self.clean.clicked.connect(MainWindow.cleanText)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "My OCR"))
self.opt.setText(_translate("MainWindow", "選擇圖檔"))
self.start.setText(_translate("MainWindow", "開始識别"))
self.clean.setText(_translate("MainWindow", "清空"))
self.config.setText(_translate("MainWindow", "配置API Key"))
self.menufile.setTitle(_translate("MainWindow", "file"))
運作界面:
exe程式及項目下載下傳:https://download.csdn.net/download/mzl_18353516147/14028396