天天看點

百度API接口研究02-UI界面圖像OCR文字識别

本篇部落客要講解調用百度API接口并做一個簡單的界面實作圖像的文字識别。

測試環境如下:

1.作業系統:windows10  X64 企業版

2.python環境:3.6.8   64位純淨版

3.已經注冊好的百度API功能區域賬号

4.python環境要安裝的API接口調用第三方庫:baidu-aip==2.2.18.0

5.PyQt5==5.12.3   pyqt5-tools == 5.13.0.1.5

main.py代碼:

# -*- coding:utf-8 -*-
import sys 	
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
from PyQt5 import QtGui
from mainGui import *
from aip import AipOcr

APP_ID = '23130726'
API_KEY = '9OO7jicB2LR754g3awfLv860'
SECRET_KEY = 'IBUTi5wPBmOqWyBYWwL4DA10kR1CTSNk'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


fileName_choose = None
class MyMainWindow(QMainWindow, Ui_Form):
    def __init__(self, parent=None):    
        super(MyMainWindow, self).__init__(parent)
        self.setupUi(self)
        self.setWindowTitle('OCR文字識别系統')
        self.pushButton_2.clicked.connect(self.openfile)
        self.pushButton.clicked.connect(self.char_recognition)
    """ 讀取圖檔 """

    def char_recognition(self):
        if fileName_choose!=None:
            res = self.img_to_str(fileName_choose)
            self.textEdit.setText(str(res))
        else:
            self.textEdit.setText('請先選擇圖檔!')
        return 0

    def openfile(self):
        global fileName_choose
        fileName_choose, _= QFileDialog.getOpenFileName(self,
                                   "選取圖檔",
                                    ' ',
                                    "All Files (*);;Image Files (*.png, *.jpg, *.jpeg)")

        if fileName_choose == ('', ''):
            return
        else:
            print(fileName_choose)
            jpg = QtGui.QPixmap(fileName_choose).scaled(self.label.width(), self.label.height())
            self.label.setPixmap(jpg)

    def get_file_content(self, filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def img_to_str(self, image_path):
        """ 可選參數 """
        options = {}
        options["language_type"] = "CHN_ENG"  # 中英文混合
        options["detect_direction"] = "true"  # 檢測朝向
        options["detect_language"] = "true"  # 是否檢測語言
        options["probability"] = "false"  # 是否傳回識别結果中每一行的置信度

        """ 帶參數調用通用文字識别 """
        result = client.basicGeneral(self.get_file_content(image_path), options)

        # 格式化輸出-提取需要的部分
        if 'words_result' in result:
            text = ('\n'.join([w['words'] for w in result['words_result']]))
        #print(type(result), "和", type(text))

        """ save """
        fs = open("result.txt", 'w+')  # 将str,儲存到txt
        fs.write(text)
        fs.close()
        return text

if __name__=="__main__":  
    app = QApplication(sys.argv)  
    myWin = MyMainWindow()  
    myWin.show()  
    sys.exit(app.exec_())  
           

mainGui.py代碼:

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'OCR識别界面.ui'
#
# Created by: PyQt5 UI code generator 5.12.3
#
# WARNING! All changes made in this file will be lost!


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_Form(object):
    def setupUi(self, Form):
        Form.setObjectName("Form")
        Form.resize(560, 330)
        Form.setMaximumSize(QtCore.QSize(580, 350))
        self.widget_2 = QtWidgets.QWidget(Form)
        self.widget_2.setGeometry(QtCore.QRect(130, 10, 421, 311))
        self.widget_2.setStyleSheet("color: rgb(170, 255, 255);")
        self.widget_2.setObjectName("widget_2")
        self.label = QtWidgets.QLabel(self.widget_2)
        self.label.setGeometry(QtCore.QRect(0, 0, 411, 311))
        self.label.setStyleSheet("background-color: rgb(170, 255, 255);")
        self.label.setText("")
        self.label.setObjectName("label")
        self.widget = QtWidgets.QWidget(Form)
        self.widget.setGeometry(QtCore.QRect(0, 10, 131, 311))
        self.widget.setObjectName("widget")
        self.verticalLayoutWidget = QtWidgets.QWidget(self.widget)
        self.verticalLayoutWidget.setGeometry(QtCore.QRect(0, 0, 122, 171))
        self.verticalLayoutWidget.setObjectName("verticalLayoutWidget")
        self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget)
        self.verticalLayout.setContentsMargins(0, 0, 0, 0)
        self.verticalLayout.setObjectName("verticalLayout")
        self.pushButton_2 = QtWidgets.QPushButton(self.verticalLayoutWidget)
        font = QtGui.QFont()
        font.setPointSize(16)
        font.setItalic(True)
        self.pushButton_2.setFont(font)
        self.pushButton_2.setObjectName("pushButton_2")
        self.verticalLayout.addWidget(self.pushButton_2)
        self.pushButton = QtWidgets.QPushButton(self.verticalLayoutWidget)
        font = QtGui.QFont()
        font.setPointSize(16)
        font.setItalic(True)
        self.pushButton.setFont(font)
        self.pushButton.setObjectName("pushButton")
        self.verticalLayout.addWidget(self.pushButton)
        spacerItem = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
        self.verticalLayout.addItem(spacerItem)
        self.label_2 = QtWidgets.QLabel(self.verticalLayoutWidget)
        font = QtGui.QFont()
        font.setPointSize(18)
        font.setBold(True)
        font.setWeight(75)
        self.label_2.setFont(font)
        self.label_2.setObjectName("label_2")
        self.verticalLayout.addWidget(self.label_2)
        spacerItem1 = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
        self.verticalLayout.addItem(spacerItem1)
        self.frame = QtWidgets.QFrame(self.widget)
        self.frame.setGeometry(QtCore.QRect(0, 170, 120, 150))
        self.frame.setMinimumSize(QtCore.QSize(120, 150))
        self.frame.setMaximumSize(QtCore.QSize(120, 150))
        self.frame.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.frame.setFrameShadow(QtWidgets.QFrame.Raised)
        self.frame.setObjectName("frame")
        self.verticalLayout_3 = QtWidgets.QVBoxLayout(self.frame)
        self.verticalLayout_3.setObjectName("verticalLayout_3")
        self.textEdit = QtWidgets.QTextEdit(self.frame)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.textEdit.sizePolicy().hasHeightForWidth())
        self.textEdit.setSizePolicy(sizePolicy)
        self.textEdit.setMinimumSize(QtCore.QSize(110, 0))
        self.textEdit.setMaximumSize(QtCore.QSize(135, 130))
        self.textEdit.setObjectName("textEdit")
        self.verticalLayout_3.addWidget(self.textEdit)

        self.retranslateUi(Form)
        QtCore.QMetaObject.connectSlotsByName(Form)

    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Form"))
        self.pushButton_2.setText(_translate("Form", "載入圖檔"))
        self.pushButton.setText(_translate("Form", "文字識别"))
        self.label_2.setText(_translate("Form", "   識别結果:"))
           

測試圖檔:

百度API接口研究02-UI界面圖像OCR文字識别

letter.png

運作效果:

百度API接口研究02-UI界面圖像OCR文字識别

result.png

程式整理及可執行程式下載下傳:https://download.csdn.net/download/mzl_18353516147/14028406

由于不太擅長設計UI,是以界面不太好看,發給我同學之後我同學參考我的設計思路修改了一下:

app.py代碼:

import sys
import time
import random
from aip import AipOcr

from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from PyQt5 import QtGui
from PyQt5.QtSql import QSqlQuery,QSqlDatabase

from Ui_ocrui import Ui_MainWindow
# from configDialog import Ui_Dialog #暫時未使用

APP_ID = '19105187'   #APP_ID
API_KEY = 'IywiicFEKa5ny2ckUp29tHVk'  #API_KEY
SECRET_KEY =  'uWnu44pu6lFjPZylFLMBymurGNTtrD2t'  #SECRET_KEY
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
options = {
            'detect_direction': 'true',
            'language_type': 'CHN_ENG',
        }


class MainWindow(QMainWindow, Ui_MainWindow):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent=parent)
        self.setupUi(self)
        self.text = ""
        self.strTime = ""
        self.basicid = ""
        self.filePath = ""

    def loadImage(self):
        self.filePath,_ = QFileDialog.getOpenFileName(self,'打開檔案','.','圖像檔案(*.png *.jpg *.jpeg)')
        self.jpg = QtGui.QPixmap(self.filePath).scaled(self.plabel.width(), self.plabel.height())
        self.plabel.setPixmap(self.jpg)

    def recognize(self):
        if(self.filePath == ""):
            print(QMessageBox.warning(self, "錯誤", "沒有找到圖檔", QMessageBox.Yes, QMessageBox.Yes))
            return

        result = aipOcr.basicAccurate(self.get_file_content(self.filePath), options)
        words_result = result['words_result']
        for i in range(len(words_result)):
            self.text = self.text + words_result[i]['words'] +'\n'
            
        self.tedit.setPlainText(self.text)
        self.tedit.repaint()    # 解決mac端文本無法顯示的問題

        self.text = ''

    def get_file_content(self,filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()
            
    def cleanText(self):
        self.tedit.repaint()    # 解決mac端文本無法顯示的問題

    def configApi(self):
        pass


if __name__ == '__main__':
    app = QApplication(sys.argv)
    mainWindow = MainWindow()
    mainWindow.show()
    sys.exit(app.exec_())
           

Ui_ocrui.py代碼:

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file '/Users/EIthschnapps/Documents/git/playground/ocr_pyqt5/ocrui.ui'
#
# Created by: PyQt5 UI code generator 5.9.2
#
# WARNING! All changes made in this file will be lost!

from PyQt5 import QtCore, QtGui, QtWidgets

class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(800, 600)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.layoutWidget = QtWidgets.QWidget(self.centralwidget)
        self.layoutWidget.setGeometry(QtCore.QRect(10, 10, 791, 541))
        self.layoutWidget.setObjectName("layoutWidget")
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.verticalLayout = QtWidgets.QVBoxLayout()
        self.verticalLayout.setObjectName("verticalLayout")
        self.opt = QtWidgets.QPushButton(self.layoutWidget)
        self.opt.setObjectName("opt")
        self.verticalLayout.addWidget(self.opt)
        self.start = QtWidgets.QPushButton(self.layoutWidget)
        self.start.setObjectName("start")
        self.verticalLayout.addWidget(self.start)
        self.plabel = QtWidgets.QLabel(self.layoutWidget)
        self.plabel.setEnabled(True)
        self.plabel.setMinimumSize(QtCore.QSize(100, 100))
        self.plabel.setMaximumSize(QtCore.QSize(100, 100))
        self.plabel.setText("")
        self.plabel.setObjectName("plabel")
        self.verticalLayout.addWidget(self.plabel)
        self.clean = QtWidgets.QPushButton(self.layoutWidget)
        self.clean.setObjectName("clean")
        self.verticalLayout.addWidget(self.clean)
        self.config = QtWidgets.QPushButton(self.layoutWidget)
        self.config.setObjectName("config")
        self.verticalLayout.addWidget(self.config)
        self.horizontalLayout.addLayout(self.verticalLayout)
        self.tedit = QtWidgets.QTextEdit(self.layoutWidget)
        self.tedit.setObjectName("tedit")
        self.horizontalLayout.addWidget(self.tedit)
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 22))
        self.menubar.setObjectName("menubar")
        self.menufile = QtWidgets.QMenu(self.menubar)
        self.menufile.setObjectName("menufile")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.menubar.addAction(self.menufile.menuAction())

        self.retranslateUi(MainWindow)
        self.opt.clicked.connect(MainWindow.loadImage)
        self.start.clicked.connect(MainWindow.recognize)
        self.config.clicked.connect(MainWindow.configApi)
        self.clean.clicked.connect(self.tedit.clear)
        self.clean.clicked.connect(self.plabel.clear)
        self.clean.clicked.connect(MainWindow.cleanText)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "My OCR"))
        self.opt.setText(_translate("MainWindow", "選擇圖檔"))
        self.start.setText(_translate("MainWindow", "開始識别"))
        self.clean.setText(_translate("MainWindow", "清空"))
        self.config.setText(_translate("MainWindow", "配置API Key"))
        self.menufile.setTitle(_translate("MainWindow", "file"))

           

運作界面:

百度API接口研究02-UI界面圖像OCR文字識别

exe程式及項目下載下傳:https://download.csdn.net/download/mzl_18353516147/14028396

繼續閱讀