cachesrch.cpp
Upload User: xhy777
Upload Date: 2007-02-14
Package Size: 24088k
Code Size: 8k
Category:

Windows Kernel

Development Platform:

Visual C++

  1. /**********************************************************************
  2.   Cache Search Stuff (simple strstr)
  3.   Marc Miller (t-marcmi) - 1998
  4.  **********************************************************************/
  5. #include "cachesrch.h"
  6. DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0;
  7. BOOL  CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() {
  8.     if (_fEndOfFile)
  9.         return FALSE;
  10.     if (!s_dwPageSize) {
  11.         SYSTEM_INFO sysInfo;
  12.         GetSystemInfo(&sysInfo);
  13.         s_dwPageSize = sysInfo.dwPageSize;
  14.     }
  15.     BOOL fNewRead = FALSE; // is this our first look at this file?
  16.     if (!_pbBuff) {
  17.         // Allocate a page of memory
  18.         // Note: find out why this returned error code #87
  19.         //_pbBuff  = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE));
  20.         _pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize));
  21.         if (!_pbBuff) {
  22.             //DWORD dwError = GetLastError();
  23.             return FALSE;
  24.         }
  25.         fNewRead          = TRUE;
  26.         _dwCacheStreamLoc = 0;
  27.     }
  28.     BOOL  fSuccess;
  29.     DWORD dwSizeRead = s_dwPageSize;
  30.     if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc,
  31.                                             _pbBuff, &dwSizeRead, 0)) && dwSizeRead)
  32.     {
  33.         _fEndOfFile        = (dwSizeRead < s_dwPageSize);
  34.         
  35.         _dwCacheStreamLoc += dwSizeRead;
  36.         _dwBuffSize        = dwSizeRead;
  37.         _pbBuffPos         = _pbBuff;
  38.         _pbBuffLast        = _pbBuff + dwSizeRead;
  39.         _dataType = ASCII_DATA; // default
  40.         if (fNewRead) {
  41.             // deterine data type
  42.             if (_dwBuffSize >= sizeof(USHORT)) {
  43.                 if      (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE)
  44.                     _dataType = UNICODE_DATA;
  45.                 else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS)
  46.                     _dataType = UNICODE_BACKWARDS_DATA;
  47.                 
  48.                 if (s_IsUnicode(_dataType))
  49.                     _pbBuffPos += s_Charsize(_dataType);
  50.             }
  51.         }
  52.     }
  53.     else {
  54.         fSuccess = FALSE;
  55.         DWORD dwError = GetLastError();
  56.         ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER);
  57.     }
  58.     return fSuccess;
  59. }
  60. CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) {
  61.     // this class can be allocated on the stack:
  62.     _pbBuff       = NULL;
  63.     _pbBuffPos    = NULL;
  64.     _pbBuffLast   = NULL;
  65.     _dwBuffSize   = 0;
  66.     _hCacheStream = hCacheStream;
  67.     _fEndOfFile   = FALSE;
  68.     // Read in preliminary block of data --
  69.     //  Die on next read to handle failure
  70.     _fEndOfFile   = !(_ReadNextBlock());
  71. }
  72. CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() {
  73.     if (_pbBuff) {
  74.         //VirtualFree(_pbBuff);
  75.         LocalFree(_pbBuff);;
  76.     }
  77. }
  78. // Read next byte from cache stream, reading in next block if necessary
  79. BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b)
  80. {
  81.     //
  82.     // If the initial read fails _pbBuffPos will be NULL.  Don't
  83.     // allow it to be dereffed.
  84.     //
  85.     BOOL fSuccess = _pbBuffPos ? TRUE : FALSE;
  86.     if (_pbBuffPos == _pbBuffLast)
  87.         fSuccess = _ReadNextBlock();
  88.     if (fSuccess)
  89.         b = *(_pbBuffPos++);
  90.     return fSuccess;
  91. }
  92. BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) {
  93.     BOOL fSuccess = TRUE;
  94.     if (s_IsUnicode(_dataType)) {
  95.         BYTE b1, b2;
  96.         LPBYTE bs = (LPBYTE)&wc;
  97.         if (_GetNextByte(b1) && _GetNextByte(b2)) {
  98.             switch (_dataType) {
  99.             case UNICODE_DATA:
  100.                 bs[0] = b1;
  101.                 bs[1] = b2;
  102.                 break;
  103.             case UNICODE_BACKWARDS_DATA:
  104.                 bs[0] = b2;
  105.                 bs[1] = b1;
  106.                 break;
  107.             default: ASSERT(0);
  108.             }
  109.         }
  110.         else
  111.             fSuccess = FALSE;
  112.     }
  113.     else 
  114.     {
  115.        
  116.         BYTE szData[2];
  117.         if (_GetNextByte(szData[0]))
  118.         {
  119.             int cch = 1;
  120.             if (IsDBCSLeadByte(szData[0]))
  121.             {
  122.                 if (!_GetNextByte(szData[1]))
  123.                 {
  124.                     fSuccess = FALSE;
  125.                 }
  126.                 cch++;
  127.             }
  128.             if (fSuccess)
  129.             {
  130.                 fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0);
  131.             }
  132.         }
  133.         else
  134.         {
  135.             fSuccess = FALSE;
  136.         }
  137.     }
  138.     return fSuccess;
  139. }
  140. // Prepare a search target string for searching --
  141. void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget)
  142. {
  143.     UINT uStrLen = lstrlenW(pwszSearchTarget);
  144.     _pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR)));
  145.     if (_pwszPreparedSearchTarget) {
  146.         // Strip leading and trailing whitespace and compress adjacent whitespace characters
  147.         //  into literal spaces
  148.         LPWSTR pwszTemp  = _pwszPreparedSearchTarget;
  149.         pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
  150.         BOOL   fAddWs    = FALSE;
  151.         while(*pwszSearchTarget) {
  152.             if (s_IsWhiteSpace(*pwszSearchTarget)) {
  153.                 fAddWs = TRUE;
  154.                 pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
  155.             }
  156.             else {
  157.                 if (fAddWs) {
  158.                     *(pwszTemp++) = L' ';
  159.                     fAddWs = FALSE;
  160.                 }
  161.                 *(pwszTemp++) = *(pwszSearchTarget++);
  162.             }
  163.         }
  164.         *pwszTemp = L'';
  165.     }
  166. }
  167. // Search a character stream for a searchtarget
  168. //  Does a simple strstr, but tries to be smart about whitespace and
  169. //  ignores HTML where possible...
  170. BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs,
  171.                                                          BOOL fIsHTML/* = FALSE*/)
  172. {
  173.     BOOL fFound = FALSE;
  174.     
  175.     if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget)
  176.     {
  177.         WCHAR   wc;
  178.         LPCWSTR pwszCurrent    = _pwszPreparedSearchTarget;
  179.         BOOL    fMatchedWS     = FALSE;
  180. #if 0
  181.         BOOL    fIgnoreHTMLTag = FALSE;
  182. #endif
  183.         
  184.         while(*pwszCurrent && wsrs.GetNextChar(wc)) {
  185. #if 0
  186.             if (fIsHTML && (wc == L'<'))
  187.                 fIgnoreHTMLTag = TRUE;
  188.             else if (fIgnoreHTMLTag) {
  189.                 if (wc == L'>')
  190.                     fIgnoreHTMLTag = FALSE;
  191.             }
  192.             else 
  193. #endif
  194.             if (s_IsWhiteSpace(wc)) {
  195.                 // matched whitespace in search stream, look for
  196.                 //  matching whitespace in target string
  197.                 if (!fMatchedWS) {
  198.                     if (s_IsWhiteSpace(*pwszCurrent)) {
  199.                         fMatchedWS = TRUE;
  200.                         ++pwszCurrent;
  201.                     }
  202.                     else
  203.                         pwszCurrent = _pwszPreparedSearchTarget;
  204.                 }
  205.             }
  206.             else {
  207.                 fMatchedWS = FALSE;
  208.                 if (!ChrCmpIW(*pwszCurrent, wc)) {
  209.                     ++pwszCurrent;
  210.                 }
  211.                 else {
  212.                     pwszCurrent = _pwszPreparedSearchTarget;
  213.                 }
  214.             }
  215.         }
  216.         fFound = !*pwszCurrent;
  217.     }
  218.     return fFound;
  219. }
  220. BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream,
  221.                                           BOOL fIsHTML/* = FALSE*/)
  222. {
  223.     CacheStreamWrapper csw(hCacheStream);
  224.     return cse.SearchCharStream(csw, fIsHTML);
  225. }