使用谷歌開源元件tesseract-OCR識別身份證(windows版本)

使用谷歌開源元件tesseract-OCR識別身份證(windows版本)

1,前期準備(軟體安裝)

下載tesseract-OCR,我下載的是3.02.02,注意各個版本之間相容性不太好,需要對應。

2,安裝tesseract-ORC,一路下一步即可,我安裝在預設路徑的(這個路徑很重要,後面程式中需要用到)

3,java程式碼實現識別方法一:通過呼叫系統命令完成識別,java程式碼參考如下,首先建立一個java工程在lib目錄下匯入這兩個jar包(記得build path)

以下是我的測試目錄介紹

話不多說,上程式碼,親測可用(經過整合已經部署到公司專案)

package com.lcm.test;
import java.io.File;
import java.io.IOException;
public class TestOcr {  
/** 
* @param args 
*/  
public static void main(String[] args) {  
//輸入圖片地址  
String path = "E://cxf//develop-workspace-new//testImageRecognition//src//2.jpg";     
try {     
String valCode = new OCR().recognizeText(new File(path), "jpg");     
System.out.println(valCode);     
} catch (IOException e) {     
e.printStackTrace();     
} catch (Exception e) {  
e.printStackTrace();  
}      
}  
}  

package com.lcm.test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdesktop.swingx.util.OS;
public class OCR {  
private final String LANG_OPTION = "-l";  //英文字母小寫l,並非數字1  
private final String EOL = System.getProperty("line.separator");  
//    private String tessPath = "C://Program Files//Tesseract-OCR";  
private String tessPath = "C://Program Files (x86)//Tesseract-OCR";  
//private String tessPath = new File("tesseract").getAbsolutePath();  
public String recognizeText(File imageFile,String imageFormat)throws Exception{  
File tempImage = ImageIOHelper.createImage(imageFile,imageFormat);  
File outputFile = new File(imageFile.getParentFile(),"output");  
StringBuffer strB = new StringBuffer();  
List<String> cmd = new ArrayList<>();  
if(OS.isWindowsXP()){  
cmd.add(tessPath "//tesseract");  
}else if(OS.isLinux()){  
cmd.add("tesseract");  
}else{  
cmd.add(tessPath "//tesseract");  
}  
cmd.add("");  
cmd.add(outputFile.getName());  
//cmd.add(LANG_OPTION); 
cmd.add("-l");
//        cmd.add("chi_sim");  
cmd.add("eng");  
ProcessBuilder pb = new ProcessBuilder();  
pb.directory(imageFile.getParentFile());  
cmd.set(1, tempImage.getName());  
pb.command(cmd);  
pb.redirectErrorStream(true);  
Process process = pb.start();  
//tesseract.exe 1.jpg 1 -l chi_sim  
int w = process.waitFor();  
//刪除臨時正在工作檔案  
tempImage.delete();  
if(w==0){  
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath() ".txt"),"UTF-8"));  
String str;  
while((str = in.readLine())!=null){  
strB.append(str).append(EOL);  
}  
in.close();  
}else{  
String msg;  
switch(w){  
case 1:  
msg = "Errors accessing files.There may be spaces in your image's filename.";  
break;  
case 29:  
msg = "Cannot recongnize the image or its selected region.";  
break;  
case 31:  
msg = "Unsupported image format.";  
break;  
default:  
msg = "Errors occurred.";  
}  
tempImage.delete();  
//throw new RuntimeException(msg);  
}  
new File(outputFile.getAbsolutePath() ".txt").delete();  
return strB.toString();  
}  
}  

package com.lcm.test;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Locale;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.ImageWriteParam;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageOutputStream;
import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
public class ImageIOHelper {  
/** 
* 圖片檔案轉換為tif格式 
* @param imageFile 檔案路徑 
* @param imageFormat 副檔名 
* @return 
*/  
public static File createImage(File imageFile, String imageFormat) {  
File tempFile = null;  
try {  
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);  
ImageReader reader = readers.next();  
ImageInputStream iis = ImageIO.createImageInputStream(imageFile);  
reader.setInput(iis);  
//Read the stream metadata  
IIOMetadata streamMetadata = reader.getStreamMetadata();  
//Set up the writeParam  
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);  
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);  
//Get tif writer and set output to file  
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");  
ImageWriter writer = writers.next();  
BufferedImage bi = reader.read(0);  
IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0));  
tempFile = tempImageFile(imageFile);  
ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);  
writer.setOutput(ios);  
writer.write(streamMetadata, image, tiffWriteParam);  
ios.close();  
writer.dispose();  
reader.dispose();  
} catch (IOException e) {  
e.printStackTrace();  
}  
return tempFile;  
}  
private static File tempImageFile(File imageFile) {  
String path = imageFile.getPath();  
StringBuffer strB = new StringBuffer(path);  
strB.insert(path.lastIndexOf('.'),0);  
return new File(strB.toString().replaceFirst("(?<=//.)(//w )$", "tif"));  
}  
}