Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
encoding.cpp
Package: shell.rar [view]
Upload User: xhy777
Upload Date: 2007-02-14
Package Size: 24088k
Code Size: 10k
Category:
Windows Kernel
Development Platform:
Visual C++
- /*****************************************************************************
- FILE: encoding.cpp
- DESCRIPTION:
- Handle taking internet strings by detecting if they are UTF-8 encoded
- or DBCS and finding out what code page was used.
- *****************************************************************************/
- #include "priv.h"
- #include "util.h"
- #include "ftpurl.h"
- #include "statusbr.h"
- #include <commctrl.h>
- #include <shdocvw.h>
- /*****************************************************************************
- CLASS: CMultiLanguageCache
- *****************************************************************************/
- HRESULT CMultiLanguageCache::_Init(void)
- {
- if (m_pml2)
- return S_OK;
- return CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage2, (void **) &m_pml2);
- }
- /*****************************************************************************
- CLASS: CWireEncoding
- *****************************************************************************/
- CWireEncoding::CWireEncoding(void)
- {
- // We can go on the stack, so we may not be zero inited.
- m_nConfidence = 0;
- m_uiCodePage = CP_ACP; //
- m_dwMode = 0;
- m_fUseUTF8 = FALSE;
- }
- CWireEncoding::~CWireEncoding(void)
- {
- }
- void CWireEncoding::_ImproveAccuracy(CMultiLanguageCache * pmlc, LPCWIRESTR pwStr, BOOL fUpdateCP, UINT * puiCodePath)
- {
- DetectEncodingInfo dei = {0};
- INT nStructs = 1;
- INT cchSize = lstrlenA(pwStr);
- IMultiLanguage2 * pml2 = pmlc->GetIMultiLanguage2();
- // Assume we will use the normal code page.
- *puiCodePath = m_uiCodePage;
- if (S_OK == pml2->DetectInputCodepage(MLDETECTCP_8BIT, CP_AUTO, (LPWIRESTR)pwStr, &cchSize, &dei, (INT *)&nStructs))
- {
- // Is it UTF8 or just plain ansi(CP_20127)?
- if (((CP_UTF_8 == dei.nCodePage) || (CP_20127 == dei.nCodePage)) &&
- (dei.nConfidence > 70))
- {
- // Yes, so make sure the caller uses UTF8 to decode but don't update
- // the codepage.
- *puiCodePath = CP_UTF_8;
- }
- else
- {
- if (fUpdateCP && (dei.nConfidence > m_nConfidence))
- {
- m_uiCodePage = dei.nCodePage;
- m_nConfidence = dei.nConfidence;
- }
- }
- }
- }
- HRESULT CWireEncoding::WireBytesToUnicode(CMultiLanguageCache * pmlc, LPCWIRESTR pwStr, DWORD dwFlags, LPWSTR pwzDest, DWORD cchSize)
- {
- HRESULT hr;
- // Optimize for the fast common case.
- if (Is7BitAnsi(pwStr))
- {
- pwzDest[0] = 0;
- SHAnsiToUnicodeCP(CP_UTF_8, pwStr, pwzDest, cchSize);
- hr = S_OK;
- }
- else
- {
- #ifdef FEATURE_CP_AUTODETECT
- if (this)
- {
- CMultiLanguageCache mlcTemp;
- UINT cchSizeTemp = cchSize;
- UINT uiCodePageToUse;
- if (!pmlc)
- pmlc = &mlcTemp;
- if (!pmlc || !pmlc->GetIMultiLanguage2())
- return E_FAIL;
- IMultiLanguage2 * pml2 = pmlc->GetIMultiLanguage2();
- _ImproveAccuracy(pmlc, pwStr, (WIREENC_IMPROVE_ACCURACY & dwFlags), &uiCodePageToUse);
- if (CP_ACP == uiCodePageToUse)
- uiCodePageToUse = GetACP();
- UINT cchSrcSize = lstrlenA(pwStr) + 1; // The need to do the terminator also.
- hr = pml2->ConvertStringToUnicode(&m_dwMode, uiCodePageToUse, (LPWIRESTR)pwStr, &cchSrcSize, pwzDest, &cchSizeTemp);
- if (!(EVAL(S_OK == hr)))
- SHAnsiToUnicode(pwStr, pwzDest, cchSize);
- }
- else
- #endif // FEATURE_CP_AUTODETECT
- {
- UINT uiCodePage = ((WIREENC_USE_UTF8 & dwFlags) ? CP_UTF_8 : CP_ACP);
- SHAnsiToUnicodeCP(uiCodePage, pwStr, pwzDest, cchSize);
- }
- }
- return hr;
- }
- HRESULT CWireEncoding::UnicodeToWireBytes(CMultiLanguageCache * pmlc, LPCWSTR pwzStr, DWORD dwFlags, LPWIRESTR pwDest, DWORD cchSize)
- {
- HRESULT hr = S_OK;
- #ifdef FEATURE_CP_AUTODETECT
- CMultiLanguageCache mlcTemp;
- DWORD dwCodePage = CP_UTF_8;
- DWORD dwModeTemp = 0;
- DWORD * pdwMode = &dwModeTemp;
- UINT cchSizeTemp = cchSize;
- // In some cases, we don't know the site, so we use this.
- // BUGBUG: Come back and force this to be set.
- if (this)
- {
- dwCodePage = m_uiCodePage;
- pdwMode = &m_dwMode;
- }
- if (!pmlc)
- pmlc = &mlcTemp;
- if (!pmlc)
- return E_FAIL;
- IMultiLanguage2 * pml2 = pmlc->GetIMultiLanguage2();
- // if (WIREENC_USE_UTF8 & dwFlags)
- // dwCodePage = CP_UTF_8;
- UINT cchSrcSize = lstrlenW(pwzStr) + 1; // The need to do the terminator also.
- if (CP_ACP == dwCodePage)
- dwCodePage = GetACP();
- hr = pml2->ConvertStringFromUnicode(pdwMode, dwCodePage, (LPWSTR) pwzStr, &cchSrcSize, pwDest, &cchSizeTemp);
- if (!(EVAL(S_OK == hr)))
- SHUnicodeToAnsi(pwzStr, pwDest, cchSize);
- #else // FEATURE_CP_AUTODETECT
- UINT nCodePage = ((WIREENC_USE_UTF8 & dwFlags) ? CP_UTF_8 : CP_ACP);
- SHUnicodeToAnsiCP(nCodePage, pwzStr, pwDest, cchSize);
- #endif // FEATURE_CP_AUTODETECT
- return hr;
- }
- HRESULT CWireEncoding::ReSetCodePages(CMultiLanguageCache * pmlc, CFtpPidlList * pFtpPidlList)
- {
- CMultiLanguageCache mlcTemp;
- if (!pmlc)
- pmlc = &mlcTemp;
- if (!pmlc)
- return E_FAIL;
- // BUGBUG/TODO:
- return S_OK;
- }
- HRESULT CWireEncoding::CreateFtpItemID(CMultiLanguageCache * pmlc, LPFTP_FIND_DATA pwfd, LPITEMIDLIST * ppidl)
- {
- CMultiLanguageCache mlcTemp;
- WCHAR wzDisplayName[MAX_PATH];
- if (!pmlc)
- pmlc = &mlcTemp;
- WireBytesToUnicode(pmlc, pwfd->cFileName, (m_fUseUTF8 ? WIREENC_USE_UTF8 : WIREENC_NONE), wzDisplayName, ARRAYSIZE(wzDisplayName));
- return FtpItemID_CreateReal(pwfd, wzDisplayName, ppidl);
- }
- HRESULT CWireEncoding::ChangeFtpItemIDName(CMultiLanguageCache * pmlc, LPCITEMIDLIST pidlBefore, LPCWSTR pwzNewName, BOOL fUTF8, LPITEMIDLIST * ppidlAfter)
- {
- CMultiLanguageCache mlcTemp;
- WIRECHAR wWireName[MAX_PATH];
- HRESULT hr;
- if (!pmlc)
- pmlc = &mlcTemp;
- hr = UnicodeToWireBytes(pmlc, pwzNewName, (fUTF8 ? WIREENC_USE_UTF8 : WIREENC_NONE), wWireName, ARRAYSIZE(wWireName));
- if (EVAL(SUCCEEDED(hr)))
- hr = FtpItemID_CreateWithNewName(pidlBefore, pwzNewName, wWireName, ppidlAfter);
- return hr;
- }
- BOOL SHIsUTF8Encoded(LPCWIRESTR pszIsUTF8)
- {
- unsigned int len = lstrlenA(pszIsUTF8);
- LPCWIRESTR endbuf = pszIsUTF8 + len;
- unsigned char byte2mask = 0x00;
- unsigned char c;
- int trailing = 0; // trailing (continuation) bytes to follow
- while (pszIsUTF8 != endbuf)
- {
- c = *pszIsUTF8++;
- if (trailing)
- {
- if ((c & 0xC0) == 0x80) // Does trailing byte follow UTF-8 format?
- {
- if (byte2mask) // Need to check 2nd byte for proper range?
- {
- if (c & byte2mask) // Are appropriate bits set?
- byte2mask=0x00;
- else
- return 0;
- trailing--;
- }
- }
- else
- return FALSE;
- }
- else
- {
- if ((c & 0x80) == 0x00)
- continue; // valid 1 byte UTF-8
- else
- {
- if ((c & 0xE0) == 0xC0) // valid 2 byte UTF-8
- {
- if (c & 0x1E) // Is UTF-8 byte in proper range?
- {
- trailing =1;
- }
- else
- return FALSE;
- }
- else
- {
- if ((c & 0xF0) == 0xE0) // valid 3 byte UTF-8
- {
- if (!(c & 0x0F)) // Is UTF-8 byte in proper range?
- byte2mask=0x20; // If not set mask to check next byte
- trailing = 2;
- }
- else
- {
- if ((c & 0xF8) == 0xF0) // valid 4 byte UTF-8
- {
- if (!(c & 0x07)) // Is UTF-8 byte in proper range?
- byte2mask=0x30; // If not set mask to check next byte
- trailing = 3;
- }
- else
- {
- if ((c & 0xFC) == 0xF8) // valid 5 byte UTF-8
- {
- if (!(c & 0x03)) // Is UTF-8 byte in proper range?
- byte2mask=0x38; // If not set mask to check next byte
- trailing = 4;
- }
- else
- {
- if ((c & 0xFE) == 0xFC) // valid 6 byte UTF-8
- {
- if (!(c & 0x01)) // Is UTF-8 byte in proper range?
- byte2mask=0x3C; // If not set mask to check next byte
- trailing = 5;
- }
- else
- return FALSE;
- }
- }
- }
- }
- }
- }
- }
- return (trailing == 0);
- }