Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

VSUtil.cpp 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef VISITED_PAGES_SEARCH
  10. #include "modules/search_engine/VSUtil.h"
  11. #include "modules/search_engine/UniCompressor.h"
  12. OP_STATUS FileWord::Add(float rank, VisitedSearch::RecordHandle h)
  13. {
  14. unsigned idx;
  15. RankRec item(rank, h);
  16. idx = file_ids->Search(item);
  17. if (idx < file_ids->GetCount() && file_ids->Get(idx).h == item.h)
  18. {
  19. item.rank = rank * file_ids->Get(idx).rank;
  20. return file_ids->Replace(idx, item);
  21. }
  22. else
  23. return file_ids->Insert(idx, item);
  24. }
  25. FileWord *FileWord::Create(const uni_char *word, float rank, VisitedSearch::RecordHandle h)
  26. {
  27. FileWord *fw;
  28. int wlen;
  29. if ((fw = OP_NEW(FileWord, ())) == NULL)
  30. return NULL;
  31. wlen = (int)uni_strlen(word) + 1;
  32. if ((fw->word = OP_NEWA(uni_char, wlen)) == NULL)
  33. {
  34. OP_DELETE(fw);
  35. return NULL;
  36. }
  37. op_memcpy(fw->word, word, wlen * 2);
  38. if ((fw->file_ids = OP_NEW(TVector<RankRec>, ())) == NULL)
  39. {
  40. OP_DELETE(fw);
  41. return NULL;
  42. }
  43. if (OpStatus::IsError(fw->file_ids->Add(RankRec(rank, h))))
  44. {
  45. OP_DELETE(fw);
  46. return NULL;
  47. }
  48. fw->index = h->m_index;
  49. return fw;
  50. }
  51. BOOL FileWord::LessThan(const void *left, const void *right)
  52. {
  53. // inlining uni_strcmp saves around 20% of this function
  54. register const uni_char *s1 = (*(FileWord **)left)->word;
  55. register const uni_char *s2 = (*(FileWord **)right)->word;
  56. while (*s1 && *s1 == *s2)
  57. ++s1, ++s2;
  58. return *s1 < *s2;
  59. }
  60. /*****************************************************************************
  61. CacheIterator */
  62. CacheIterator::CacheIterator(TypeDescriptor::ComparePtr sort) : m_results(TypeDescriptor(sizeof(VisitedSearch::Result),
  63. &VisitedSearch::Result::Assign,
  64. sort,
  65. &VisitedSearch::Result::DeleteResult
  66. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  67. , &VisitedSearch::Result::EstimateMemoryUsed
  68. #endif
  69. ))
  70. , m_pos(0)
  71. {
  72. }
  73. OP_STATUS CacheIterator::Init(const TVector<FileWord *> &cache, const TVector<uni_char *> *words, BOOL prefix_search)
  74. {
  75. TVector<FileWord::RankRec> handles, prefixes;
  76. VisitedSearch::Result result;
  77. int i, j;
  78. uni_char *w;
  79. uni_char **p_w = &w;
  80. FileWord fw;
  81. int vmin, k, dpos, dlen, prefix_len;
  82. m_pos = 0;
  83. if (cache.GetCount() == 0)
  84. return OpStatus::OK;
  85. if (words->GetCount() == 0)
  86. { // include all documents
  87. RETURN_IF_ERROR(handles.DuplicateOf(*(cache[0]->file_ids)));
  88. for (i = 1; i < (int)cache.GetCount(); ++i)
  89. {
  90. RETURN_IF_ERROR(handles.Unite(*(cache[i]->file_ids)));
  91. }
  92. }
  93. else if (prefix_search && words->GetCount() == 1)
  94. {
  95. w = words->Get(0);
  96. prefix_len = (int)uni_strlen(w);
  97. fw.word = w;
  98. i = cache.Search(&fw);
  99. fw.word = NULL; // Avoid OP_DELETE
  100. while (i < (int)cache.GetCount() && uni_strncmp(w, cache[i]->word, prefix_len) == 0)
  101. {
  102. RETURN_IF_ERROR(handles.Unite(*(cache[i]->file_ids)));
  103. ++i;
  104. }
  105. }
  106. else {
  107. w = words->Get(0);
  108. fw.word = w;
  109. i = cache.Search(&fw);
  110. fw.word = NULL; // Avoid OP_DELETE
  111. if (i >= (int)cache.GetCount() || FileWord::LessThan(&p_w, &(cache[i]))) // not found
  112. return OpStatus::OK;
  113. RETURN_IF_ERROR(handles.DuplicateOf(*(cache[i]->file_ids)));
  114. for (j = 1; (UINT32)j < words->GetCount() - (UINT32)prefix_search && handles.GetCount() > 0; ++j)
  115. {
  116. w = words->Get(j);
  117. fw.word = w;
  118. i = cache.Search(&fw);
  119. fw.word = NULL; // Avoid OP_DELETE
  120. if (i >= (int)cache.GetCount() || FileWord::LessThan(&p_w, &(cache[i]))) // not found
  121. return OpStatus::OK;
  122. // VectorBase::Intersect + modifies the ranking
  123. vmin = 0;
  124. dpos = 0;
  125. dlen = 0;
  126. do {
  127. k = cache[i]->file_ids->Search(handles.Get(dpos + dlen), vmin, cache[i]->file_ids->GetCount());
  128. if (k < (int)cache[i]->file_ids->GetCount() && !(handles.Get(dpos + dlen) < cache[i]->file_ids->Get(k)))
  129. {
  130. handles[dpos + dlen].rank *= cache[i]->file_ids->Get(k).rank;
  131. vmin = k;
  132. if (dlen > 0)
  133. handles.Delete(dpos, dlen);
  134. else
  135. ++dpos;
  136. dlen = 0;
  137. }
  138. else
  139. ++dlen;
  140. } while (dpos + dlen < (int)handles.GetCount());
  141. if (dlen > 0)
  142. handles.Delete(dpos, dlen);
  143. }
  144. if (handles.GetCount() == 0)
  145. return OpStatus::OK;
  146. if (prefix_search)
  147. {
  148. w = words->Get(words->GetCount() - 1);
  149. prefix_len = (int)uni_strlen(w);
  150. fw.word = w;
  151. i = cache.Search(&fw);
  152. fw.word = NULL; // Avoid OP_DELETE
  153. if (i >= (int)cache.GetCount() || uni_strncmp(w, cache[i]->word, prefix_len) != 0) // not found
  154. return OpStatus::OK;
  155. do {
  156. RETURN_IF_ERROR(prefixes.Unite(*(cache[i]->file_ids)));
  157. ++i;
  158. } while (i < (int)cache.GetCount() && uni_strncmp(w, cache[i]->word, prefix_len) == 0);
  159. // VectorBase::Intersect + modifies the ranking
  160. vmin = 0;
  161. dpos = 0;
  162. dlen = 0;
  163. do {
  164. k = prefixes.Search(handles.Get(dpos + dlen), vmin, prefixes.GetCount());
  165. if (k < (int)prefixes.GetCount() && !(handles.Get(dpos + dlen) < prefixes.Get(k)))
  166. {
  167. handles[dpos + dlen].rank *= prefixes.Get(k).rank;
  168. vmin = k;
  169. if (dlen > 0)
  170. handles.Delete(dpos, dlen);
  171. else
  172. ++dpos;
  173. dlen = 0;
  174. }
  175. else
  176. ++dlen;
  177. } while (dpos + dlen < (int)handles.GetCount());
  178. if (dlen > 0)
  179. handles.Delete(dpos, dlen);
  180. }
  181. }
  182. RETURN_IF_ERROR(m_results.Reserve(handles.GetCount()));
  183. for (i = handles.GetCount() - 1; i >= 0; --i)
  184. {
  185. RETURN_IF_ERROR(Handle2Result(result, handles[i].h));
  186. result.ranking = handles[i].rank;
  187. RETURN_IF_ERROR(m_results.Add(result));
  188. VisitedSearch::Result::DeleteResult(&result);
  189. }
  190. return m_results.Sort();
  191. }
  192. OP_STATUS CacheIterator::Handle2Result(VisitedSearch::Result &result, VisitedSearch::RecordHandle handle)
  193. {
  194. UniCompressor uc;
  195. if (handle->GetField("url").CopyStringValue(&result.url) == NULL)
  196. return OpStatus::ERR_NO_MEMORY;
  197. if (handle->GetField("title").CopyStringValue(&result.title) == NULL)
  198. {
  199. VisitedSearch::Result::DeleteResult(&result);
  200. return OpStatus::ERR_NO_MEMORY;
  201. }
  202. if ((result.thumbnail_size = handle->GetField("thumbnail").GetSize()) > 0)
  203. {
  204. if ((result.thumbnail = OP_NEWA(unsigned char, result.thumbnail_size)) == NULL)
  205. {
  206. VisitedSearch::Result::DeleteResult(&result);
  207. return OpStatus::ERR_NO_MEMORY;
  208. }
  209. handle->GetField("thumbnail").GetValue(result.thumbnail, result.thumbnail_size);
  210. }
  211. OP_STATUS status = result.SetCompressedPlaintext((const unsigned char *)handle->GetField("plaintext").GetAddress(), handle->GetField("plaintext").GetSize());
  212. if (OpStatus::IsError(status))
  213. {
  214. VisitedSearch::Result::DeleteResult(&result);
  215. return status;
  216. }
  217. handle->GetField("visited").GetValue(&result.visited);
  218. result.ranking = 0.0F;
  219. return OpStatus::OK;
  220. }
  221. #endif // VISITED_PAGES_SEARCH