天天看點

Java字元集編碼與轉碼

Java字元的class代碼總是Unicode字元集的UTF-16編碼,運作時記憶體中的字元串在沒有指定編碼的時候也總是Unicode編碼。

Java編譯時候,會将java檔案的編碼按照指定編碼或者(系統預設的)編碼轉換為Unicode并加載到記憶體中進行編譯。

下面給出一個Java轉碼工具,沒有測試過,呵呵:

package lavasoft.common; 

import org.apache.commons.logging.Log; 

import org.apache.commons.logging.LogFactory; 

import java.io.*; 

/** 

* 轉碼工具,全面支援檔案、字元串的轉碼 

* @author Administrator 2009-11-29 16:14:21 

*/ 

public class EncodingToolkit { 

        private static Log log = LogFactory.getLog(EncodingToolkit.class); 

        public static void main(String[] args) { 

                String han = "漢"; 

                System.out.println("---------"); 

        } 

        /** 

         * 對字元串重新編碼 

         * 

         * @param text                字元串 

         * @param resEncoding 源編碼 

         * @param newEncoding 新編碼 

         * @return 重新編碼後的字元串 

         */ 

        public static String reEncoding(String text, String resEncoding, String newEncoding) { 

                String rs = null; 

                try { 

                        rs = new String(text.getBytes(resEncoding), newEncoding); 

                } catch (UnsupportedEncodingException e) { 

                        log.error("讀取檔案為一個記憶體字元串失敗,失敗原因是使用了不支援的字元編碼"); 

                        throw new RuntimeException(e); 

                } 

                return rs; 

         * 重新編碼Unicode字元串 

         * @param text                源字元串 

         * @param newEncoding 新的編碼 

         * @return 指定編碼的字元串 

        public static String reEncoding(String text, String newEncoding) { 

                        rs = new String(text.getBytes(), newEncoding); 

                        log.error("讀取檔案為一個記憶體字元串失敗,失敗原因是使用了不支援的字元編碼" + newEncoding); 

         * 文本檔案重新編碼 

         * @param resFile         源檔案 

         * @param resEncoding 源檔案編碼 

         * @param distFile        目标檔案 

         * @param newEncoding 目标檔案編碼 

         * @return 轉碼成功時候傳回ture,否則false 

        public static boolean reEncoding(File resFile, String resEncoding, File distFile, String newEncoding) { 

                boolean flag = true; 

                InputStreamReader reader = null; 

                OutputStreamWriter writer = null; 

                        reader = new InputStreamReader(new FileInputStream(resFile), resEncoding); 

                        writer = new OutputStreamWriter(new FileOutputStream(distFile), newEncoding); 

                        char buf[] = new char[1024 * 64];         //字元緩沖區 

                        int len; 

                        while ((len = reader.read(buf)) != -1) { 

                                writer.write(buf, 0, len); 

                        } 

                        writer.flush(); 

                        writer.close(); 

                        reader.close(); 

                } catch (FileNotFoundException e) { 

                        flag = false; 

                        log.error("沒有找到檔案,轉碼發生異常!"); 

                } catch (IOException e) { 

                        log.error("讀取檔案為一個記憶體字元串失敗,失敗原因是讀取檔案異常!"); 

                } finally { 

                        if (reader != null) try { 

                                reader.close(); 

                        } catch (IOException e) { 

                                flag = false; 

                                throw new RuntimeException(e); 

                        } finally { 

                                if (writer != null) try { 

                                        writer.close(); 

                                } catch (IOException e) { 

                                        flag = false; 

                                        throw new RuntimeException(e); 

                                } 

                return flag; 

         * 讀取檔案為一個Unicode編碼的記憶體字元串,保持檔案原有的換行格式 

         * @param resFile    源檔案對象 

         * @param encoding 檔案字元集編碼 

         * @return 檔案内容的Unicode字元串 

        public static String file2String(File resFile, String encoding) { 

                StringBuffer sb = new StringBuffer(); 

                        LineNumberReader reader = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(resFile), encoding))); 

                        String line; 

                        while ((line = reader.readLine()) != null) { 

                                sb.append(line).append(System.getProperty("line.separator")); 

                        log.error("讀取檔案為一個記憶體字元串失敗,失敗原因是使用了不支援的字元編碼" + encoding); 

                        log.error("讀取檔案為一個記憶體字元串失敗,失敗原因所給的檔案" + resFile + "不存在!"); 

                return sb.toString(); 

         * 使用指定編碼讀取輸入流為一個記憶體Unicode字元串,保持檔案原有的換行格式 

         * @param in             輸入流 

         * @param encoding 建構字元流時候使用的字元編碼 

         * @return Unicode字元串 

        public static String stream2String(InputStream in, String encoding) { 

                LineNumberReader reader = null; 

                        reader = new LineNumberReader(new BufferedReader(new InputStreamReader(in, encoding))); 

                        in.close(); 

                        if (in != null) try { 

                                in.close(); 

                                log.error("關閉輸入流發生異常!", e); 

         * 字元串儲存為制定編碼的文本檔案 

         * @param text         字元串 

         * @param distFile 目标檔案 

         * @param encoding 目标檔案的編碼 

         * @return 轉換成功時候傳回ture,否則false 

        public static boolean string2TextFile(String text, File distFile, String encoding) { 

                if (!distFile.getParentFile().exists()) distFile.getParentFile().mkdirs(); 

                        writer = new OutputStreamWriter(new FileOutputStream(distFile), encoding); 

                        writer.write(text); 

                        log.error("将字元串寫入檔案發生異常!"); 

                        if (writer != null) try { 

                                writer.close(); 

                                log.error("關閉輸出流發生異常!", e); 

}

本文轉自 leizhimin 51CTO部落格,原文連結:http://blog.51cto.com/lavasoft/236392,如需轉載請自行聯系原作者