GBK和UTF8編碼之間互相轉換的兩個函數

GBK和UTF8之間的轉換可以使用MultiByteToWideChar和WideCharToMultiByte兩個API，方法是先把它們轉換為中間編碼Unicode，再轉換為對應的編碼即可。

#include < stdio.h >

#include < windows.h >

// GBK編碼轉換到UTF8編碼

int GBKToUTF8(unsigned char * lpGBKStr,unsigned char * lpUTF8Str, int nUTF8StrLen)

{

wchar_t * lpUnicodeStr = NULL;

int nRetLen = 0 ;

if ( ! lpGBKStr) // 如果GBK字元串為NULL則出錯退出

return 0 ;

nRetLen = ::MultiByteToWideChar(CP_ACP, 0 ,( char * )lpGBKStr, - 1 ,NULL,NULL); // 擷取轉換到Unicode編碼後所需要的字元空間長度

lpUnicodeStr = new WCHAR[nRetLen + 1 ]; // 為Unicode字元串空間

nRetLen = ::MultiByteToWideChar(CP_ACP, 0 ,( char * )lpGBKStr, - 1 ,lpUnicodeStr,nRetLen); // 轉換到Unicode編碼

if ( ! nRetLen) // 轉換失敗則出錯退出

return 0 ;

nRetLen = ::WideCharToMultiByte(CP_UTF8, 0 ,lpUnicodeStr, - 1 ,NULL, 0 ,NULL,NULL); // 擷取轉換到UTF8編碼後所需要的字元空間長度

if ( ! lpUTF8Str) // 輸出緩沖區為空則傳回轉換後需要的空間大小

{

if (lpUnicodeStr)

delete []lpUnicodeStr;

return nRetLen;

}

if (nUTF8StrLen < nRetLen) // 如果輸出緩沖區長度不夠則退出

{

if (lpUnicodeStr)

delete []lpUnicodeStr;

return 0 ;

}

nRetLen = ::WideCharToMultiByte(CP_UTF8, 0 ,lpUnicodeStr, - 1 ,( char * )lpUTF8Str,nUTF8StrLen,NULL,NULL); // 轉換到UTF8編碼

if (lpUnicodeStr)

delete []lpUnicodeStr;

return nRetLen;

}

// UTF8編碼轉換到GBK編碼

int UTF8ToGBK(unsigned char * lpUTF8Str,unsigned char * lpGBKStr, int nGBKStrLen)

{

wchar_t * lpUnicodeStr = NULL;

int nRetLen = 0 ;

if ( ! lpUTF8Str) // 如果UTF8字元串為NULL則出錯退出

return 0 ;

nRetLen = ::MultiByteToWideChar(CP_UTF8, 0 ,( char * )lpUTF8Str, - 1 ,NULL,NULL); // 擷取轉換到Unicode編碼後所需要的字元空間長度

lpUnicodeStr = new WCHAR[nRetLen + 1 ]; // 為Unicode字元串空間

nRetLen = ::MultiByteToWideChar(CP_UTF8, 0 ,( char * )lpUTF8Str, - 1 ,lpUnicodeStr,nRetLen); // 轉換到Unicode編碼

if ( ! nRetLen) // 轉換失敗則出錯退出

return 0 ;

nRetLen = ::WideCharToMultiByte(CP_ACP, 0 ,lpUnicodeStr, - 1 ,NULL,NULL,NULL,NULL); // 擷取轉換到GBK編碼後所需要的字元空間長度

if ( ! lpGBKStr) // 輸出緩沖區為空則傳回轉換後需要的空間大小

{

if (lpUnicodeStr)

delete []lpUnicodeStr;

return nRetLen;

}

if (nGBKStrLen < nRetLen) // 如果輸出緩沖區長度不夠則退出

{

if (lpUnicodeStr)

delete []lpUnicodeStr;

return 0 ;

}

nRetLen = ::WideCharToMultiByte(CP_ACP, 0 ,lpUnicodeStr, - 1 ,( char * )lpGBKStr,nRetLen,NULL,NULL); // 轉換到GBK編碼

if (lpUnicodeStr)

delete []lpUnicodeStr;

return nRetLen;

}

// 使用這兩個函數的例子

int main()

{

char cGBKStr[] = " 我是中國人! " ;

char * lpGBKStr = NULL;

char * lpUTF8Str = NULL;

FILE * fp = NULL;

int nRetLen = 0 ;

nRetLen = GBKToUTF8((unsigned char * )cGBKStr,NULL,NULL);

printf( " 轉換後的字元串需要的空間長度為：%d " ,nRetLen);

lpUTF8Str = new char [nRetLen + 1 ];

nRetLen = GBKToUTF8((unsigned char * )cGBKStr,(unsigned char * )lpUTF8Str,nRetLen);

if (nRetLen)

{

printf( " GBKToUTF8轉換成功！ " );

}

else

{

printf( " GBKToUTF8轉換失敗！ " );

goto Ret0;

}

fp = fopen( " C:/GBK轉UTF8.txt " , " wb " ); // 儲存到文本檔案

fwrite(lpUTF8Str,nRetLen, 1 ,fp);

fclose(fp);

getchar(); // 先去打開那個文本檔案看看，單擊記事本的“檔案”-“另存為”菜單，在對話框中看到編碼框變為了“UTF-8”說明轉換成功了

nRetLen = UTF8ToGBK((unsigned char * )lpUTF8Str,NULL,NULL); // 再轉回來

printf( " 轉換後的字元串需要的空間長度為：%d " ,nRetLen);

lpGBKStr = new char [nRetLen + 1 ];

nRetLen = UTF8ToGBK((unsigned char * )lpUTF8Str,(unsigned char * )lpGBKStr,nRetLen);

if (nRetLen)

{

printf( " UTF8ToGBK轉換成功！ " );

}

else

{

printf( " UTF8ToGBK轉換失敗！ " );

goto Ret0;

}

fp = fopen( " C:/UTF8轉GBK.txt " , " wb " ); // 儲存到文本檔案

fwrite(lpGBKStr,nRetLen, 1 ,fp);

fclose(fp);

getchar(); // 再去打開文本檔案看看，發現編碼框又變為了“ANSI”說明轉換成功了

Ret0:

if (lpGBKStr)

delete []lpGBKStr;

if (lpUTF8Str)

delete []lpUTF8Str;

return 0 ;

}

在網上看到的一些文章說，UTF8轉換為GBK的時候會有問題，特别是當UTF8字元串中的漢字數為奇數時。關于這個問題我沒有去驗證過，而且我對UTF8和GB2312的編碼還不是很熟悉，呵呵，等以後有空的時候再去了解一下編碼吧。

GBK和UTF8編碼之間互相轉換的兩個函數

繼續閱讀

與專家面對面：Android開發入門問與答

tabpanel 使用問題

轉換字元串成ＩＮＴ６４

linux網絡程式設計----發送與接收檔案

c寫檔案

對于0-1分數規劃的Dinkelbach算法的分析

處理PCX檔案

基于XOR的加密程式

不用iconv函數實作UTF-8編碼轉換GB2312的PHP函數

浮點數計算精度控制

CDHtmlDialog中阻止某頁面的顯示

Android – ListView 中添加按鈕，動态删除添加ItemView的操作

C++ 第十五周報告1--《冒泡法排序》

[轉]九大排序算法——C語言實作及詳解

無元件上傳圖檔到資料庫中，最完整解決方案

QR碼編碼原理三（日本漢字和中文編碼）