IME Develop

Development Platform:
Visual C++

converse.cpp：Code Content
							
#include "converse.h"

// 类实现  
 void CChineseCode::UnicodeToGB2312(char* pOut,wchar_t uData)  
 {  
   
     WideCharToMultiByte(936,0,&uData,2,pOut,sizeof(wchar_t),NULL,NULL); 
     pOut[2]=0;
     return;  
   
 }          
 void CChineseCode::Gb2312ToUnicode(wchar_t* pOut,char *gbBuffer)  
   
 {  
   
     ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);  
   
     return ;  
   
 }  

 void CChineseCode::UTF_8ToUnicode(wchar_t* pOut,char *pText)  
 {  
   
     char* uchar = (char *)pOut;  
    
     uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);  
   
     uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);  
   
     return;  
   
 }  
 void CChineseCode::UnicodeToUTF_8(char* pOut,wchar_t* pText)   
 {  
   
     // 注意 WCHAR高低字的顺序,低字节在前，高字节在后  
   
     char* pchar = (char *)pText;  
   
     pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));  
   
     pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);  
   
     pOut[2] = (0x80 | (pchar[0] & 0x3F));  

     pOut[3]=0;
     
     return;  
   
 }  
 void CChineseCode::GB2312ToUTF_8(char* pOut,char *pText, int pLen)   
 {  
     char buf[4];  
     int nLength = pLen* 3;  
     char* rst = new char[nLength];   
     memset(buf,0,4);    
     memset(rst,0,nLength);  
     int i = 0;  
     int j = 0;        
     while(i < pLen)  
     {  
         //如果是英文直接复制就可以  
         if( *(pText + i) >= 0)  
         {  
             rst[j++] = pText[i++];  
         }  
         else
         {
         wchar_t pbuffer;  
         Gb2312ToUnicode(&pbuffer,pText+i);  
         UnicodeToUTF_8(buf,&pbuffer); 
          unsigned short int tmp = 0;  
          tmp = rst[j] = buf[0];  
          tmp = rst[j+1] = buf[1];  
          tmp = rst[j+2] = buf[2];   
          j+=3;
          i+=2;
         }  
     }    
     rst[j] = 0;    
     //返回结果  
     pOut = rst;               
     delete []rst;     
     return;  
 }    
 void CChineseCode::UTF_8ToGB2312(char *pOut, char *pText, int pLen)  
 {  
     char * newBuf = new char[pLen];  
     char Ctemp[4];  
     memset(Ctemp,0,4);
     int i =0;  
     int j = 0;   
     while(i < pLen)   
     {     
         if(pText[i] > 0)   
         {    
                 newBuf[j++] = pText[i++];                         
            }  
            else                   
            {  
                    wchar_t Wtemp;  
                    UTF_8ToUnicode(&Wtemp,pText + i);  
                    UnicodeToGB2312(Ctemp,Wtemp);  
                 newBuf[j] = Ctemp[0];  
                 newBuf[j + 1] = Ctemp[1];  
                 i += 3;      
                 j += 2;     
         }  
     }  
     newBuf[j] = 0;  
     pOut = newBuf;  
     delete []newBuf; 
     return;   
 }    

 unsigned int CChineseCode::UNI2UTF(unsigned short uni)
{
	unsigned int utf8;
	if(uni < 0x80)
	{
		utf8 = uni;
		return utf8;
	}
	if(uni < 0x800)
	{
		utf8 = (0xc0 | (uni >> 6)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	if(uni < 0x10000)
	{
		utf8 = (0xe0 | (uni>>12)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	if(uni < 0x20000)
	{
		utf8 = (0xf0 | (uni >> 18)) << 24
			| (0x80 | (uni >> 12 & 0x3f)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	else
	{
		//we don't deal with it, so we return the unicode.
		return uni;
	}
}
 int  CChineseCode::UTF2UNI(char *str, unsigned short* chPtr)
// str is the UTF8 next character pointer 
// chPtr is the int for the result 
{
  int byte;
  char *p;

  byte = *((unsigned char *) str);
  if (byte == '&')
    {
      int i, n = 0;

      byte = *((unsigned char *) (str + 1));
      if (byte == '#')
	{
          byte = *((unsigned char *) (str + 2));
          if (byte == 'x' || byte == 'X')
            {
              for (i = 3; i < 8; i++)
                {
                  byte = *((unsigned char *) (str + i));
                  if (byte >= 'A' && byte <= 'F')
                    byte = byte - 'A' + 10;
                  else if (byte >= 'a' && byte <= 'f')
                    byte = byte - 'a' + 10;
                  else if (byte >= '0' && byte <= '9')
                    byte = byte - '0';
                  else
                    break;
                  n = (n * 16) + byte;
                }
            }
          else
            {
	      for (i = 2; i < 8; i++)
	        {
	          byte = *((unsigned char *) (str + i));
	          if (byte >= '0' && byte <= '9')
	            n = (n * 10) + (byte - '0');
	          else
		    break;
		}
	    }
	  if (byte == ';')
	    {
	      *chPtr = (unsigned short) n;
	      return ++i;
	    }
	}
      else
        {
		*chPtr = 0;
		return 1;
        }
    }


  byte = *((unsigned char *) str);

  if (byte < 0xC0)
    {

      *chPtr = (unsigned short) byte;
      return 1;
    }
  else if (byte < 0xE0)
    {
      if ((str[1] & 0xC0) == 0x80)
	{


	  *chPtr = (unsigned short) (((byte & 0x1F) << 6) | (str[1] & 0x3F));
	  return 2;
	}

      *chPtr = (unsigned short) byte;
      return 1;
    }
  else if (byte < 0xF0)
    {
      if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80))
	{

	  *chPtr = (unsigned short) (((byte & 0x0F) << 12)
				  | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F));
	  return 3;
	}

      *chPtr = (unsigned short) byte;
      return 1;
    }

  *chPtr = (unsigned short) byte;
  return 1;
}


 unsigned short CChineseCode::UNI2MB(unsigned short usUNI)
{
    unsigned char  szGB[3]  = { 0 };
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usGB;
    WideCharToMultiByte(
        CP_ACP,
        0, 
        wzUNI,  
        2, 
        (LPSTR)szGB,     
        2,
        0,
        0
    ); 
    usGB = (szGB[0] << 8) | szGB[1];    
    return usGB;
}
 unsigned short CChineseCode::GBK2UNI(unsigned short usGBK)
{
    unsigned char  szEUC[2] = { usGBK >> 8, usGBK & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(        
        936,
        0, 
        (LPCSTR)szEUC,  
        2, 
        &usUNI,     
        1
    ); 
    return usUNI;   
}
 unsigned short CChineseCode::JIS2UNI(unsigned short usJIS)
{
    unsigned char  szEUC[2] = { (usJIS | 0x8080) >> 8, (usJIS | 0x8080) & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(        
        20932,
        0, 
        (LPCSTR)szEUC,  
        2, 
        &usUNI,     
        1
    ); 
    return usUNI;   
}
 unsigned short CChineseCode::SJIS2UNI(unsigned short usSJIS)
{
    unsigned char  szEUC[2] = { usSJIS >> 8, usSJIS & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(        
        932,
        0, 
        (LPCSTR)szEUC,  
        2, 
        &usUNI,     
        1
    ); 
    return usUNI;   
}
 unsigned short CChineseCode::UNI2GBK(unsigned short usUNI)
{
    unsigned char  szGBK[3]={0};
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usGBK;
    WideCharToMultiByte(
        936,
        0, 
        wzUNI,  
        2, 
        (LPSTR)szGBK,     
        2,
        0,
        0
    ); 
    usGBK = (szGBK[0] << 8) | szGBK[1];    
    return usGBK;
}
 unsigned short CChineseCode::UNI2SJIS(unsigned short usUNI)
{
    unsigned char  szSJIS[3]  = { 0 };
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usSJIS;
    WideCharToMultiByte(
        932,
        0, 
        wzUNI,  
        2, 
        (LPSTR)szSJIS,     
        2,
        0,
        0
    ); 
    usSJIS = (szSJIS[0] << 8) | szSJIS[1];    
    return usSJIS;
}
//实际返回的是EUC码 EUC码减去0x8080就是JIS码
unsigned short CChineseCode::UNI2JIS(unsigned short usUNI)
{
    unsigned char  szJIS[3]  = { 0 };
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usJIS;
    WideCharToMultiByte(
        20932,
        0, 
        wzUNI,  
        2, 
        (LPSTR)szJIS,     
        2,
        0,
        0
    ); 
    usJIS = (szJIS[0] << 8) | szJIS[1];    
    usJIS-=0x8080;
    return usJIS;
}
 unsigned short CChineseCode::SJIS2JIS( unsigned short sjis )
{
    unsigned short ubyte, lbyte;
    
    
    if (((sjis >= 0x8140) && (sjis <= 0x9ffc)) ||
        ((sjis >= 0xe040) && (sjis <= 0xeffc))	)
    {
        ubyte = sjis >> 8;
        lbyte = sjis & 0x00ff;
        
        if ( (lbyte <= 0x3f) || (lbyte == 0x7f) || 
                (lbyte >= 0xfd) ) return 0;
        
        if ( ubyte >= 0xe0 ) ubyte -= 0xc0; else ubyte -= 0x80;
        ubyte = (ubyte << 1) + 0x1f;
        
        if ( lbyte >= 0x9f )
        {
            ubyte++;
            lbyte -= 0x7e;
        } else {
            if ( lbyte >= 0x80 ) lbyte--;
            lbyte -= 0x1f;
        }
        
        return ( ubyte << 8 ) + lbyte;
        
    } else {
        return 0;
    }
}

 unsigned short CChineseCode::JIS2SJIS( unsigned short jis )
{
    unsigned short     ubyte, lbyte;
    
    
    ubyte = jis >> 8;
    lbyte = jis & 0x00ff;
    
    lbyte += 0x1f;
    if ( lbyte >= 0x7f ) lbyte++;
    if ( lbyte <= 0x3f ) return 0;
    
    if ( (ubyte & 0x0001) == 0 )
    {
        lbyte = jis & 0x00ff;
        lbyte += 0x7e;
        ubyte--;
        if ( lbyte > 0xfd ) return 0;
    }
    
    ubyte -= 0x1f;
    ubyte = ubyte >> 1;
    ubyte += 0x80;
    if ( ubyte >= 0xa0 ) ubyte += 0x40;
    
    if ( ((ubyte >= 0x81) && (ubyte <= 0x9f)) ||
            ((ubyte >= 0xe0) && (ubyte <= 0xef)) )
    {
        return (ubyte << 8) + lbyte;
    } else {
        return 0;
    }
}