Google搜索中文的时候在地址栏中会出现如:%E4%B8%AD%E5%9B%BD这样的字符串,在VC++中需要进行处理来获得相应的汉字。转换方法如下:
1、Get ride of '%' from string, we get the utf8 string in type of ascii character.
2、Using AsciiString2HexString() Change the ascii character string to HEX string.
3、Using MultiByteToWideChar(CP_UTF8, 0, ...) function to change the utf8 string to unicode string.
4、Using WideCharToMultiByte(CP_ACP, 0, ...) function to change the unicode to GB2312 string.
实现:
CString strSource("%E4%B8%AD%E5%9B%BD");
CString strTarget;
CString strTemp;
int RetValue = 0;
1、
strSource.Remove('%');
2、
AsciiString2HexString(strSource.GetBuffer(strSource.GetLength()), strSource.GetLength(), (unsigned char*)strTemp.GetBuffer(strSource.GetLength()/2+1));
strTemp.ReleaseBuffer();
3、
RetValue = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)strTemp.GetBuffer(strTemp.GetLength()), strTemp.GetLength(), (unsigned short*)strTarget.GetBuffer(strTemp.GetLength()*2/3), strTemp.GetLength()/3);
strTarget.ReleaseBuffer();
4、
RetValue = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)strTarget.GetBuffer(strTarget.GetLength()), strTarget.GetLength()/2, strTemp.GetBuffer(strTarget.GetLength()+1), strTarget.GetLength()+1, NULL, NULL);
strTemp.ReleaseBuffer(strTarget.GetLength());
strTarget = strTemp;
strTarget中存放的就是转换后获得的汉字。
注意:RetValue等于0表明转换过程失败,可以进行相应的错误处理,比如 return strTarget = ""; 另附录utf-8 Ascii码字符串转utf-8编码字符串的函数于下,以便参考。以上代码WindowsXP(SP2), VC++6.0环境下调试通过。引用本文清注明出处,谢谢!
附:
BOOL CCodeTranslationDlg::AsciiString2HexString(char* Asciistring, int nAsciistringLength, unsigned char* Hexstring)
{
BOOL bRet = TRUE;
int i = 0;
unsigned char nTemp = 0;
while(i<nAsciistringLength)
{
switch(Asciistring[i])
{
case 'A':
nTemp = 0xA;
break;
case 'B':
nTemp = 0xB;
break;
case 'C':
nTemp = 0xC;
break;
case 'D':
nTemp = 0xD;
break;
case 'E':
nTemp = 0xE;
break;
case 'F':
nTemp = 0xF;
break;
case '0':
nTemp = 0x0;
break;
case '1':
nTemp = 0x1;
break;
case '2':
nTemp = 0x2;
break;
case '3':
nTemp = 0x3;
break;
case '4':
nTemp = 0x4;
break;
case '5':
nTemp = 0x5;
break;
case '6':
nTemp = 0x6;
break;
case '7':
nTemp = 0x7;
break;
case '8':
nTemp = 0x8;
break;
case '9':
nTemp = 0x9;
break;
default:
//error occur
bRet = FALSE;
return bRet;
}
if(i%2 == 0)
{
Hexstring[i/2] = nTemp<<4;
}
else
{
Hexstring[i/2] |= nTemp;
}
i++;
}
//Set the null character to the end of string.
Hexstring[i/2] = 0;
return bRet;
}