天天看點

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

直接上代碼,代碼每一步都是解釋與插圖,一步步實作,如果不清楚opencv的環境如何搭建,可上網查或者參見我的前幾篇部落格,不多說了, java代碼如下:

package com.zmx.opencvtest;


import org.opencv.core.*;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.io.File;
import java.io.IOException;

/**
 * Created by zhangwenchao on 2017/9/27.
 */
public class FirstOpenCVTest {
    static {

        //注意程式運作的時候需要在VM option添加該行 指明opencv的dll檔案所在路徑
        //-Djava.library.path=$PROJECT_DIR$\opencv\x64
        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);   //載入opencv all庫
    }

    public static void main(String[] args) throws InterruptedException {

        /**
         * 1. 讀取原始圖像轉換為OpenCV的Mat資料格式
         */

        Mat srcMat = Imgcodecs.imread("E:/srcImage.jpg");  //原始圖像


        /**
         * 2. 強原始圖像轉化為灰階圖像
         */
        Mat grayMat = new Mat(); //灰階圖像
        Imgproc.cvtColor(srcMat, grayMat, Imgproc.COLOR_RGB2GRAY);

        BufferedImage grayImage =  toBufferedImage(grayMat);

        saveJpgImage(grayImage,"E:/grayImage.jpg");

        System.out.println("儲存灰階圖像!");


        /**
         * 3、對灰階圖像進行二值化處理
         */
        Mat binaryMat = new Mat(grayMat.height(),grayMat.width(),CvType.CV_8UC1);
        Imgproc.threshold(grayMat, binaryMat, 20, 255, Imgproc.THRESH_BINARY);
        BufferedImage binaryImage =  toBufferedImage(binaryMat);
        saveJpgImage(binaryImage,"E:/binaryImage.jpg");
        System.out.println("儲存二值化圖像!");


        /**
         * 4、圖像腐蝕---腐蝕後變得更加寬,粗.便于識别--使用3*3的圖檔去腐蝕
         */
        Mat destMat = new Mat(); //腐蝕後的圖像
        Mat element = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
        Imgproc.erode(binaryMat,destMat,element);
        BufferedImage destImage =  toBufferedImage(destMat);
        saveJpgImage(destImage,"E:/destImage.jpg");
        System.out.println("儲存腐蝕化後圖像!");


        /**
         * 5 圖檔切割
         */

        //擷取截圖的範圍--從第一行開始周遊,統計每一行的像素點值符合門檻值的個數,再根據個數判斷該點是否為邊界
        //判斷該行的黑色像素點是否大于一定值(此處為150),大于則留下,找到上邊界,下邊界後立即停止
        int a =0, b=0, state = 0;
        for (int y = 0; y < destMat.height(); y++)//行
        {
            int count = 0;
            for (int x = 0; x < destMat.width(); x++) //列
            {
                //得到該行像素點的值
                byte[] data = new byte[1];
                destMat.get(y, x, data);
                if (data[0] == 0)
                    count = count + 1;
            }
            if (state == 0)//還未到有效行
            {
                if (count >= 150)//找到了有效行
                {//有效行允許十個像素點的噪聲
                    a = y;
                    state = 1;
                }
            }
            else if (state == 1)
            {
                if (count <= 150)//找到了有效行
                {//有效行允許十個像素點的噪聲
                    b = y;
                    state = 2;
                }
            }
        }
        System.out.println("過濾下界"+Integer.toString(a));
        System.out.println("過濾上界"+Integer.toString(b));


        //參數,坐标X,坐标Y,截圖寬度,截圖長度
        Rect rect = new Rect(0,a,destMat.width(),b - a);
        Mat resMat = new Mat(destMat,rect);
        BufferedImage resImage =  toBufferedImage(resMat);
        saveJpgImage(resImage,"E:/resImage.jpg");
        System.out.println("儲存切割後圖像!");


        /**
         * 識别-
         */
       /* try {
            Process  pro = Runtime.getRuntime().exec(new String[]{"D:/Program Files (x86)/Tesseract-OCR/tesseract.exe", "E:/resImage.jpg","E:/result"});
            pro.waitFor();
        } catch (IOException e) {
            e.printStackTrace();
        }*/

        try {
            String result =  TesseractOCRUtil.recognizeText(new File("E:/resImage.jpg"));
            System.out.println(result);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }



    /**
     * 将Mat圖像格式轉化為 BufferedImage
     * @param matrix  mat資料圖像
     * @return BufferedImage
     */
    private static BufferedImage toBufferedImage(Mat matrix) {
        int type = BufferedImage.TYPE_BYTE_GRAY;
        if (matrix.channels() > 1) {
            type = BufferedImage.TYPE_3BYTE_BGR;
        }
        int bufferSize = matrix.channels() * matrix.cols() * matrix.rows();
        byte[] buffer = new byte[bufferSize];
        matrix.get(0, 0, buffer); // 擷取所有的像素點
        BufferedImage image = new BufferedImage(matrix.cols(), matrix.rows(), type);
        final byte[] targetPixels = ((DataBufferByte)image.getRaster().getDataBuffer()).getData();
        System.arraycopy(buffer, 0, targetPixels, 0, buffer.length);
        return image;
    }


    /**
     * 将BufferedImage記憶體圖像儲存為圖像檔案
     * @param image BufferedImage
     * @param filePath  檔案名
     */
    private static void saveJpgImage(BufferedImage image, String filePath) {

        try {
            ImageIO.write(image, "jpg", new File(filePath));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }





}
           

對于圖檔識别,我單獨寫了一個工具類,本文也有引用,java代碼如下:

package com.zmx.opencvtest;

/**
 * Created by zhangwenchao on 2017/9/28.
 */


import java.io.BufferedReader;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

public class TesseractOCRUtil{

    private static final String LANG_OPTION = "-l";
    private static final String EOL = System.getProperty("line.separator");

    /**
     * @param imageFile
     *            傳入的圖像檔案
     * @return 識别後的字元串
     */
    public static String recognizeText(File imageFile) throws Exception {
        /**
         * 設定輸出檔案的儲存的檔案目錄
         */
        File outputFile = new File(imageFile.getParentFile(), "output");

        StringBuffer strB = new StringBuffer();

        Process  pro = Runtime.getRuntime().exec(
                         new String[]{
                            "D:/Program Files (x86)/Tesseract-OCR/tesseract.exe",
                            imageFile.getPath(),
                            outputFile.getPath()}
                         );
       int w = pro.waitFor();
        if (w == 0) // 0代表正常退出
        {
            BufferedReader in = new BufferedReader(new InputStreamReader(
                    new FileInputStream(outputFile.getAbsolutePath() + ".txt"),
                    "UTF-8"));
            String str;

            while ((str = in.readLine()) != null)
            {
                strB.append(str).append(EOL);
            }
            in.close();
        } else
        {
            String msg;
            switch (w)
            {
                case 1:
                    msg = "Errors accessing files. There may be spaces in your image's filename.";
                    break;
                case 29:
                    msg = "Cannot recognize the image or its selected region.";
                    break;
                case 31:
                    msg = "Unsupported image format.";
                    break;
                default:
                    msg = "Errors occurred.";
            }
            throw new RuntimeException(msg);
        }
        new File(outputFile.getAbsolutePath() + ".txt").delete();
        return strB.toString().replaceAll("\\s*", "");



    }


    public static void main(String[] args) {


        try {
            String result =  recognizeText(new File("E:/resImage.jpg"));
            System.out.println(result);
        } catch (Exception e) {
            e.printStackTrace();
        }


    }
}

           

運作結果如下:

1、原始圖像(網上不知哪位仁兄的銀行卡):

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

2、儲存灰階圖像!

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

3、儲存二值化圖像!

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

4、儲存腐蝕化後圖像!

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

5、擷取的截取圖像的上下邊界

       過濾上界386

       過濾下界447

6、儲存切割後圖像!

Java使用OpenCV和Tesseract-OCR實作銀行卡圖檔處理與卡号識别

7、識别的卡号:

         6228482298797273578

搞了一天的時間,總算大功告成,效果還不錯!