天天看点

如何将Google搜索地址栏里的uft-8字符串转换为汉字

Google搜索中文的时候在地址栏中会出现如:%E4%B8%AD%E5%9B%BD这样的字符串,在VC++中需要进行处理来获得相应的汉字。转换方法如下:

1、Get ride of '%' from string, we get the utf8 string in type of ascii character.

2、Using AsciiString2HexString() Change the ascii character string to HEX string.

3、Using MultiByteToWideChar(CP_UTF8, 0, ...) function to change the utf8 string to unicode string.

4、Using WideCharToMultiByte(CP_ACP, 0, ...) function to change the unicode to GB2312 string.

实现:

 CString strSource("%E4%B8%AD%E5%9B%BD");

 CString strTarget;

 CString strTemp;

 int RetValue = 0;

1、 

strSource.Remove('%');

2、

AsciiString2HexString(strSource.GetBuffer(strSource.GetLength()), strSource.GetLength(), (unsigned char*)strTemp.GetBuffer(strSource.GetLength()/2+1));

 strTemp.ReleaseBuffer();

3、

 RetValue = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)strTemp.GetBuffer(strTemp.GetLength()), strTemp.GetLength(), (unsigned short*)strTarget.GetBuffer(strTemp.GetLength()*2/3), strTemp.GetLength()/3);

 strTarget.ReleaseBuffer();

4、

 RetValue = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)strTarget.GetBuffer(strTarget.GetLength()), strTarget.GetLength()/2, strTemp.GetBuffer(strTarget.GetLength()+1), strTarget.GetLength()+1, NULL, NULL);

 strTemp.ReleaseBuffer(strTarget.GetLength()); 

strTarget = strTemp;

strTarget中存放的就是转换后获得的汉字。

注意:RetValue等于0表明转换过程失败,可以进行相应的错误处理,比如 return strTarget = ""; 另附录utf-8 Ascii码字符串转utf-8编码字符串的函数于下,以便参考。以上代码WindowsXP(SP2), VC++6.0环境下调试通过。引用本文清注明出处,谢谢!

附:

BOOL CCodeTranslationDlg::AsciiString2HexString(char* Asciistring, int nAsciistringLength, unsigned char* Hexstring)

{

 BOOL bRet = TRUE;

 int i = 0;

 unsigned char nTemp = 0;

 while(i<nAsciistringLength)

 {

  switch(Asciistring[i])

  {

  case 'A':

   nTemp = 0xA;

   break;

  case 'B':

   nTemp = 0xB;

   break;

  case 'C':

   nTemp = 0xC;

   break;

  case 'D':

   nTemp = 0xD;

   break;

  case 'E':

   nTemp = 0xE;

   break;

  case 'F':

   nTemp = 0xF;

   break;

  case '0':

   nTemp = 0x0;

   break;

  case '1':

   nTemp = 0x1;

   break;

  case '2':

   nTemp = 0x2;

   break;

  case '3':

   nTemp = 0x3;

   break;

  case '4':

   nTemp = 0x4;

   break;

  case '5':

   nTemp = 0x5;

   break;

  case '6':

   nTemp = 0x6;

   break;

  case '7':

   nTemp = 0x7;

   break;

  case '8':

   nTemp = 0x8;

   break;

  case '9':

   nTemp = 0x9;

   break;

  default:

   //error occur

   bRet = FALSE;

   return bRet;

  }

  if(i%2 == 0)

  {

   Hexstring[i/2] = nTemp<<4;

  }

  else

  {

   Hexstring[i/2] |= nTemp;

  }

  i++;

 }

 //Set the null character to the end of string.

 Hexstring[i/2] = 0;

 return bRet;

}

继续阅读