天天看點

Tesseract ocr 中文批量訓練腳本

@echo off 

set "src=%1%" 

set "font_name=%2%"

set "desc=%3%" 

if  not  defined src set /p src=" please pass your filename : "

if  not  defined font_name set /p font_name=" please pass your font_name : "

rem 判斷參數的合法性

if  not  defined src echo  IllegalArgumentException arg1 must not be null &  pause>nul & exit

if  not  defined font_name echo  IllegalArgumentException arg2 must not be null &  pause>nul & exit

if  not  defined desc set "desc=%src:~0,-4%"  

 echo desc %desc%

rem 如果目錄下沒有font_properties 檔案建立 font_properties ,并寫入檔案

if exist font_properties (

 echo  font_properties exist

) else (

ECHO  %font_name% 0 0 0 0 0  >"font_properties"

)

rem  删除原有檔案  

if exist %font_name%.unicharset ECHO DEL %font_name%.unicharset &   DEL  /Q  names %font_name%.unicharset

if exist %font_name%.inttemp  ECHO DEL %font_name%.inttemp &  DEL  /Q  names %font_name%.inttemp

if exist %font_name%.pffmtable  ECHO DEL %font_name%.pffmtable &  DEL  /Q  names %font_name%.pffmtable

if exist %font_name%.shapetable ECHO DEL %font_name%.shapetable & DEL  /Q  names %font_name%.shapetable

if exist %font_name%.normproto ECHO DEL %font_name%.normproto & DEL  /Q  names %font_name%.normproto

if exist %font_name%.font_properties ECHO DEL %font_name%.font_properties & DEL  /Q  names %font_name%.font_properties

rem   makebox

tesseract  %src%  %desc%   -l chi_sim -psm 6 batch.nochop makebox

java -Xms128m -Xmx512m -jar jTessBoxEditor/jTessBoxEditor.jar

ECHO Please change your results , and press any key to continue

pause>nul 

tesseract  %src%  %desc%  -l chi_sim -psm 6 nobatch box.train

unicharset_extractor %desc%.box

shapeclustering -F font_properties -U unicharset %desc%.tr

mftraining -F font_properties -U unicharset -O  unicharset %desc%.tr

cntraining %desc%.tr

rem  配置新檔案

if exist unicharset ECHO rename unicharset %font_name%.unicharset &  rename unicharset %font_name%.unicharset

if exist inttemp ECHO rename inttemp %font_name%.inttemp &  rename inttemp %font_name%.inttemp

if exist pffmtable ECHO rename pffmtable %font_name%.pffmtable &  rename pffmtable %font_name%.pffmtable

if exist shapetable ECHO rename shapetable %font_name%.shapetable &  rename shapetable %font_name%.shapetable

if exist normproto ECHO rename normproto %font_name%.normproto &  rename normproto %font_name%.normproto

combine_tessdata %font_name%.

if exist font_properties ECHO rename font_properties %font_name%.font_properties & rename font_properties %font_name%.font_properties

ECHO  press any key to continue

pause>nul 

版權聲明:本文為CSDN部落客「weixin_34288121」的原創文章,遵循CC 4.0 BY-SA版權協定,轉載請附上原文出處連結及本聲明。

原文連結:https://blog.csdn.net/weixin_34288121/article/details/91913667