天天看点

自动识别文件编码

public static Reader readTranslateFile(File bsFile) {

        final int BOM_SIZE = 4;

        byte[] bom = new byte[BOM_SIZE];

        BufferedReader reader = null;

        PushbackInputStream pushbackInputStream = null;

        try {

            // 获取编码

            String sourceEncoding = CpdetectorUtil.perceiveCode(bsFile);

            // 读取BOM

            pushbackInputStream = new PushbackInputStream(new FileInputStream(

                bsFile), BOM_SIZE);

            int n = pushbackInputStream.read(bom, 0, bom.length);

            int unread = n;

            // 判断是否有BOM

            unread = ishasBOM(bom, n, unread);

            // 读取偏移

            pushbackInputStream.unread(bom, (n - unread), unread);

            reader = new BufferedReader(new InputStreamReader(

                pushbackInputStream, sourceEncoding));

            return reader;

        } catch (MalformedURLException e) {

            Logger.getLogger().error(FileUtil.class, e);

        } catch (IOException e) {

            Logger.getLogger().error(FileUtil.class, e);

        }

        return null;

    }

 

 private static final CodepageDetectorProxy detector = CodepageDetectorProxy

        .getInstance();

    static {

        detector.add(new ParsingDetector(false));

        detector.add(JChardetFacade.getInstance());

        detector.add(UnicodeDetector.getInstance());

        detector.add(ASCIIDetector.getInstance());

    }

    private CpdetectorUtil() {

    }

   

    public static String perceiveCode(File file) throws IOException {

        if (null == file || !file.exists()) {

            return null;

        }

        Charset charset = null;

        charset = detector.detectCodepage(file.toURI().toURL());

        if (charset != null) {

            return charset.name();

        }

        return null;

    }

cpdetector_1.0.10.jar

chardet-1.0.jar

antlr-2.7.4.jar

继续阅读