天天看點

Tess4J圖檔文字識别

public static void main(String[] args) throws Exception {
    ITesseract instance = new Tesseract();
    File imageFile = new File("jcaptcha.jpg");
    BufferedImage bi = ImageIO.read(imageFile);
    BufferedImage textImage = ImageHelper.convertImageToGrayscale(ImageHelper.getSubImage(bi, 0, 0, bi.getWidth(), bi.getHeight()));
    // 圖檔銳化,自己使用中影響識别率的主要因素是針式列印機字迹不連貫,是以銳化反而降低識别率
    textImage = ImageHelper.convertImageToBinary(textImage);
    // 圖檔放大5倍,增強識别率(很多圖檔本身無法識别,放大5倍時就可以輕易識,但是考濾到客戶電腦組態低,針式列印機列印不連貫的問題,這裡就放大5倍)
    textImage = ImageHelper.getScaledInstance(textImage, bi.getWidth() * 10, bi.getHeight() * 10);
    ImageIO.write(textImage, "jpg", new File("jcaptcha0.jpg"));
    String result = instance.doOCR(new File("jcaptcha0.jpg"));
    System.out.println(result);
  }      

繼續閱讀