Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

VSIterator.cpp 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef VISITED_PAGES_SEARCH
  10. #include "modules/search_engine/VisitedSearch.h"
  11. #include "modules/search_engine/VSIterator.h"
  12. #include "modules/search_engine/VSUtil.h"
  13. #include "modules/search_engine/UniCompressor.h"
  14. #include "modules/hardcore/opera/opera.h"
  15. /*****************************************************************************
  16. AllDocIterator */
  17. OP_STATUS AllDocIterator::Init(void)
  18. {
  19. Next();
  20. if (m_result.GetCount() > 0)
  21. m_result_pos = 0;
  22. return m_status;
  23. }
  24. BOOL AllDocIterator::Next(void)
  25. {
  26. SearchIterator<IdTime> *it;
  27. VisitedSearch::Result row;
  28. m_status = OpStatus::OK;
  29. if ((unsigned)(m_result_pos + 1) < m_result.GetCount())
  30. {
  31. ++m_result_pos;
  32. return TRUE;
  33. }
  34. if (m_result.GetCount() == 0)
  35. it = m_index->m_alldoc.SearchFirst();
  36. else
  37. it = m_index->m_alldoc.Search(IdTime(m_result[m_result.GetCount() - 1].visited, m_result[m_result.GetCount() - 1].id), operatorGT);
  38. if (it == NULL)
  39. {
  40. OP_ASSERT(0); // this was more probably a search error than OOM
  41. m_status = OpStatus::ERR_NO_MEMORY;
  42. return FALSE;
  43. }
  44. if (it->End())
  45. {
  46. if ((unsigned)m_result_pos < m_result.GetCount())
  47. ++m_result_pos;
  48. OP_DELETE(it);
  49. return FALSE;
  50. }
  51. if (m_result.GetSize() == m_result.GetCount())
  52. {
  53. if (OpStatus::IsError(m_result.Reserve(m_result.GetSize() + 20)))
  54. {
  55. OP_DELETE(it);
  56. return FALSE;
  57. }
  58. }
  59. do {
  60. row.id = it->Get().data[IDTIME_ID];
  61. row.ranking = 1.0;
  62. OP_ASSERT(row.id != 0);
  63. if (OpStatus::IsError(m_status = VisitedSearch::Result::ReadResult(row, &(m_index->m_metadata))))
  64. {
  65. OP_DELETE(it);
  66. return FALSE;
  67. }
  68. if (row.invalid || row.next != 0)
  69. VisitedSearch::Result::DeleteResult(&row);
  70. else
  71. if (OpStatus::IsError(m_status = m_result.Add(row)))
  72. {
  73. OP_DELETE(it);
  74. return FALSE;
  75. }
  76. } while (m_result.GetSize() > m_result.GetCount() && it->Next());
  77. if (OpStatus::IsError(m_status = it->Error()))
  78. {
  79. OP_DELETE(it);
  80. return FALSE;
  81. }
  82. OP_DELETE(it);
  83. ++m_result_pos;
  84. if (m_result_pos == 0)
  85. {
  86. if (m_result.GetCount() <= 1)
  87. return FALSE;
  88. ++m_result_pos;
  89. }
  90. return m_result_pos < (int)m_result.GetCount();
  91. }
  92. BOOL AllDocIterator::Prev(void)
  93. {
  94. if (m_result_pos <= 0)
  95. {
  96. if (m_result_pos == 0)
  97. --m_result_pos;
  98. return FALSE;
  99. }
  100. --m_result_pos;
  101. return TRUE;
  102. }
  103. /*****************************************************************************
  104. RankIterator */
  105. OP_STATUS RankIterator::AddWord(UINT32 pos)
  106. {
  107. OpFileLength file_pos;
  108. int size;
  109. TVector<RankId> *word_rank;
  110. TVector<RankId> *word_id;
  111. OP_STATUS err;
  112. if (pos == 0)
  113. return OpStatus::ERR_NULL_POINTER;
  114. RETURN_IF_ERROR(m_rank_vec.Reserve(m_rank_vec.GetCount() + 1));
  115. RETURN_IF_ERROR(m_id_vec.Reserve(m_id_vec.GetCount() + 1));
  116. if ((word_rank = OP_NEW(TVector<RankId>, (&RankId::CompareRank))) == NULL)
  117. return OpStatus::ERR_NO_MEMORY;
  118. if ((word_id = OP_NEW(TVector<RankId>, (&RankId::CompareId))) == NULL)
  119. {
  120. OP_DELETE(word_rank);
  121. return OpStatus::ERR_NO_MEMORY;
  122. }
  123. file_pos = ((OpFileLength)pos) * m_index->m_wordbag.GetBlockSize();
  124. size = m_index->m_wordbag.DataLength(file_pos) / sizeof(RankId);
  125. if (size == 0)
  126. {
  127. if (file_pos >= m_index->m_wordbag.GetFileSize())
  128. err = OpStatus::ERR_OUT_OF_RANGE;
  129. else {
  130. OP_ASSERT(0); // empty vector for this word? impossible! really.
  131. err = OpStatus::ERR;
  132. }
  133. goto cleanup;
  134. }
  135. if (OpStatus::IsError(err = word_rank->SetCount(size)))
  136. goto cleanup;
  137. if (OpStatus::IsError(err = word_id->Reserve(size)))
  138. goto cleanup;
  139. if (!m_index->m_wordbag.ReadApnd(word_rank->Ptr(), size * sizeof(RankId), file_pos))
  140. { // read error from disk?
  141. err = OpStatus::ERR;
  142. goto cleanup;
  143. }
  144. if (OpStatus::IsError(err = word_id->DuplicateOf(*word_rank)))
  145. goto cleanup;
  146. if (OpStatus::IsError(err = word_rank->Sort()))
  147. goto cleanup;
  148. if (OpStatus::IsError(err = word_id->Sort()))
  149. goto cleanup;
  150. if (OpStatus::IsError(err = m_rank_vec.Add(word_rank)))
  151. goto cleanup;
  152. if (OpStatus::IsError(err = m_id_vec.Add(word_id)))
  153. {
  154. m_rank_vec.RemoveByItem(word_rank);
  155. goto cleanup;
  156. }
  157. if (m_max_line > (unsigned)size)
  158. m_max_line = size;
  159. return OpStatus::OK;
  160. cleanup:
  161. OP_DELETE(word_id);
  162. OP_DELETE(word_rank);
  163. return err;
  164. }
  165. BOOL RankIterator::Next(void)
  166. {
  167. int i, j;
  168. unsigned idx;
  169. float rank;
  170. VisitedSearch::Result res_val;
  171. m_status = OpStatus::OK;
  172. // no words
  173. if (m_rank_vec.GetCount() == 0)
  174. return AllDocIterator::Next();
  175. if (m_line >= m_max_line)
  176. {
  177. ++m_result_pos;
  178. return (unsigned)m_result_pos < m_result.GetCount();
  179. }
  180. if ((unsigned)(m_result_pos + 1) < m_result.GetCount())
  181. {
  182. rank = 0.0;
  183. for (j = m_rank_vec.GetCount() - 1; j >= 0 ; --j)
  184. rank += m_rank_vec[j]->Get(m_line).rank;
  185. rank /= m_rank_vec.GetCount();
  186. if (rank > m_result[m_result_pos + 1].ranking) // cannot get better results
  187. {
  188. ++m_result_pos;
  189. return TRUE;
  190. }
  191. }
  192. idx = m_result.GetCount() + 100 + m_rank_vec.GetCount();
  193. RETURN_VALUE_IF_ERROR(m_status = m_result.Reserve(idx <= m_max_line * m_rank_vec.GetCount() ? idx : m_max_line * m_rank_vec.GetCount()), FALSE);
  194. while (m_line < m_max_line)
  195. {
  196. for (i = m_rank_vec.GetCount() - 1; i >= 0; --i)
  197. {
  198. rank = m_rank_vec[i]->Get(m_line).rank;
  199. for (j = m_id_vec.GetCount() - 1; j >= 0 ; --j)
  200. {
  201. if (j == i)
  202. continue;
  203. idx = m_id_vec[j]->Search(m_rank_vec[i]->Get(m_line));
  204. if (idx >= m_id_vec[j]->GetCount() || m_id_vec[j]->Get(idx).data[RANKID_ID] != m_rank_vec[i]->Get(m_line).data[RANKID_ID])
  205. break;
  206. rank += m_id_vec[j]->Get(idx).rank;
  207. }
  208. if (j >= 0)
  209. continue;
  210. rank /= m_rank_vec.GetCount();
  211. res_val.id = m_rank_vec[i]->Get(m_line).data[RANKID_ID];
  212. res_val.ranking = rank;
  213. idx = m_result.Search(res_val);
  214. if (idx < m_result.GetCount() &&
  215. (m_result[idx].id == m_rank_vec[i]->Get(m_line).data[RANKID_ID] ||
  216. (idx > 0 && m_result[idx - 1].id == m_rank_vec[i]->Get(m_line).data[RANKID_ID]) || // can be one off due to rounding of the float number
  217. (idx < m_result.GetCount() - 1 && m_result[idx + 1].id == m_rank_vec[i]->Get(m_line).data[RANKID_ID])))
  218. continue;
  219. RETURN_VALUE_IF_ERROR(m_status = VisitedSearch::Result::ReadResult(res_val, &(m_index->m_metadata)), FALSE);
  220. if (res_val.invalid || res_val.next != 0)
  221. VisitedSearch::Result::DeleteResult(&res_val);
  222. else
  223. RETURN_VALUE_IF_ERROR(m_result.Insert(idx, res_val), FALSE);
  224. }
  225. ++m_line;
  226. if (m_result.GetCount() > (unsigned)(m_result_pos + 1) && m_line < m_max_line)
  227. {
  228. rank = 0.0;
  229. for (j = m_rank_vec.GetCount() - 1; j >= 0 ; --j)
  230. rank += m_rank_vec[j]->Get(m_line).rank;
  231. rank /= m_rank_vec.GetCount();
  232. if (rank > m_result[m_result_pos + 1].ranking) // cannot get better results
  233. break;
  234. }
  235. if (m_result.GetCount() >= m_result.GetSize() - m_rank_vec.GetCount()) // allways keep enough place for a complete line
  236. {
  237. idx = m_result.GetCount() + 100 + m_rank_vec.GetCount();
  238. RETURN_VALUE_IF_ERROR(m_status = m_result.Reserve(idx <= m_max_line * m_rank_vec.GetCount() ? idx : m_max_line * m_rank_vec.GetCount()), FALSE);
  239. }
  240. }
  241. ++m_result_pos;
  242. return m_line < m_max_line || (unsigned)m_result_pos < m_result.GetCount();
  243. }
  244. /*****************************************************************************
  245. TimeIterator */
  246. OP_STATUS TimeIterator::AddWord(UINT32 pos)
  247. {
  248. OpFileLength file_pos;
  249. int size;
  250. TVector<RankId> *word_id;
  251. OP_STATUS err;
  252. if (pos == 0)
  253. return OpStatus::ERR_NULL_POINTER;
  254. RETURN_IF_ERROR(m_id_vec.Reserve(m_id_vec.GetCount() + 1));
  255. if ((word_id = OP_NEW(TVector<RankId>, (&RankId::CompareId))) == NULL)
  256. return OpStatus::ERR_NO_MEMORY;
  257. file_pos = ((OpFileLength)pos) * m_index->m_wordbag.GetBlockSize();
  258. size = m_index->m_wordbag.DataLength(file_pos) / sizeof(RankId);
  259. if (size == 0)
  260. {
  261. OP_DELETE(word_id);
  262. if (file_pos >= m_index->m_wordbag.GetFileSize())
  263. return OpStatus::ERR_OUT_OF_RANGE;
  264. else {
  265. OP_ASSERT(0); // empty vector for this word? impossible! really.
  266. return OpStatus::ERR;
  267. }
  268. }
  269. if (OpStatus::IsError(err = word_id->SetCount(size)))
  270. {
  271. OP_DELETE(word_id);
  272. return err;
  273. }
  274. if (!m_index->m_wordbag.ReadApnd(word_id->Ptr(), size * sizeof(RankId), file_pos))
  275. { // read error from disk?
  276. OP_DELETE(word_id);
  277. return OpStatus::ERR;
  278. }
  279. if (OpStatus::IsError(err = word_id->Sort()) ||
  280. OpStatus::IsError(err = m_id_vec.Add(word_id)))
  281. {
  282. OP_DELETE(word_id);
  283. return err;
  284. }
  285. return OpStatus::OK;
  286. }
  287. BOOL TimeIterator::Next(void)
  288. {
  289. int j;
  290. unsigned idx;
  291. float rank;
  292. SearchIterator<IdTime> *it;
  293. VisitedSearch::Result row;
  294. RankId tmp_srch;
  295. m_status = OpStatus::OK;
  296. // no words
  297. if (m_id_vec.GetCount() == 0)
  298. return AllDocIterator::Next();
  299. if ((unsigned)(m_result_pos + 1) < m_result.GetCount())
  300. {
  301. ++m_result_pos;
  302. return TRUE;
  303. }
  304. if (m_result_pos != -1 && (unsigned)m_result_pos >= m_result.GetCount()) // end already reached
  305. return FALSE;
  306. if (m_result.GetCount() == 0)
  307. it = m_index->m_alldoc.SearchFirst();
  308. else
  309. it = m_index->m_alldoc.Search(IdTime(m_result[m_result.GetCount() - 1].visited, m_result[m_result.GetCount() - 1].id), operatorGT);
  310. if (it == NULL)
  311. {
  312. OP_ASSERT(0); // more probably a search error
  313. m_status = OpStatus::ERR_NO_MEMORY;
  314. return FALSE;
  315. }
  316. if (it->End())
  317. {
  318. if ((unsigned)m_result_pos < m_result.GetCount())
  319. ++m_result_pos;
  320. OP_DELETE(it);
  321. return FALSE;
  322. }
  323. if (m_result.GetSize() == m_result.GetCount())
  324. {
  325. if (OpStatus::IsError(m_result.Reserve(m_result.GetSize() + 20)))
  326. {
  327. OP_DELETE(it);
  328. return FALSE;
  329. }
  330. }
  331. do {
  332. rank = 0.0;
  333. tmp_srch.data[RANKID_ID] = it->Get().data[RANKID_ID];
  334. for (j = m_id_vec.GetCount() - 1; j >= 0 ; --j)
  335. {
  336. idx = m_id_vec[j]->Search(tmp_srch);
  337. if (idx >= m_id_vec[j]->GetCount() || m_id_vec[j]->Get(idx).data[RANKID_ID] != tmp_srch.data[RANKID_ID])
  338. break;
  339. rank += m_id_vec[j]->Get(idx).rank;
  340. }
  341. if (j >= 0)
  342. continue;
  343. row.id = it->Get().data[IDTIME_ID];
  344. row.ranking = rank / m_id_vec.GetCount();
  345. if (OpStatus::IsError(m_status = VisitedSearch::Result::ReadResult(row, &(m_index->m_metadata))))
  346. {
  347. OP_DELETE(it);
  348. return FALSE;
  349. }
  350. if (row.invalid || row.next != 0)
  351. VisitedSearch::Result::DeleteResult(&row);
  352. else if (OpStatus::IsError(m_status = m_result.Add(row)))
  353. {
  354. VisitedSearch::Result::DeleteResult(&row);
  355. OP_DELETE(it);
  356. return FALSE;
  357. }
  358. } while (m_result.GetSize() > m_result.GetCount() && it->Next());
  359. if (OpStatus::IsError(m_status = it->Error()))
  360. {
  361. OP_DELETE(it);
  362. return FALSE;
  363. }
  364. OP_DELETE(it);
  365. ++m_result_pos;
  366. if (m_result_pos == 0)
  367. {
  368. if (m_result.GetCount() <= 1)
  369. return FALSE;
  370. ++m_result_pos;
  371. }
  372. return m_result_pos < (int)m_result.GetCount();
  373. }
  374. /*****************************************************************************
  375. MultiOrIterator */
  376. BOOL MultiOrIterator::Next(void)
  377. {
  378. int i, j, m;
  379. BOOL end;
  380. i = 0;
  381. while (i < (int)subindex.GetCount() && subindex[i]->End())
  382. {
  383. RETURN_VALUE_IF_ERROR(subindex[i]->Error(), FALSE);
  384. ++i;
  385. }
  386. if (i >= (int)subindex.GetCount() || OpStatus::IsError(subindex[i]->Error()))
  387. return FALSE;
  388. end = FALSE;
  389. m = i;
  390. for (j = subindex.GetCount() - 1; j > m; --j)
  391. {
  392. RETURN_VALUE_IF_ERROR(subindex[j]->Error(), FALSE);
  393. if (!subindex[j]->End())
  394. {
  395. end = TRUE;
  396. if (Compare(&subindex[j]->Get(), &subindex[i]->Get()))
  397. i = j;
  398. }
  399. }
  400. return subindex[i]->Next() || end;
  401. }
  402. BOOL MultiOrIterator::Prev(void)
  403. {
  404. int i, j, m;
  405. BOOL beginning;
  406. i = 0;
  407. while (i < (int)subindex.GetCount() && subindex[i]->End())
  408. {
  409. RETURN_VALUE_IF_ERROR(subindex[i]->Error(), FALSE);
  410. ++i;
  411. }
  412. if (i >= (int)subindex.GetCount() || OpStatus::IsError(subindex[i]->Error()))
  413. return FALSE;
  414. beginning = FALSE;
  415. m = i;
  416. for (j = subindex.GetCount() - 1; j > m; --j)
  417. {
  418. RETURN_VALUE_IF_ERROR(subindex[j]->Error(), FALSE);
  419. if (!subindex[j]->End())
  420. {
  421. beginning = TRUE;
  422. if (Compare(&subindex[i]->Get(), &subindex[j]->Get()))
  423. i = j;
  424. }
  425. }
  426. return subindex[i]->Prev() || beginning;
  427. }
  428. const VisitedSearch::Result &MultiOrIterator::Get(void)
  429. {
  430. int i, j, m;
  431. i = 0;
  432. while (i < (int)subindex.GetCount() && subindex[i]->End())
  433. {
  434. OP_ASSERT(OpStatus::IsSuccess(subindex[i]->Error()));
  435. ++i;
  436. }
  437. if (i >= (int)subindex.GetCount()) {
  438. OP_ASSERT(0);
  439. return *(VisitedSearch::Result*)g_opera->search_engine_module.empty_visited_search_result;
  440. }
  441. OP_ASSERT(OpStatus::IsSuccess(subindex[i]->Error()));
  442. m = i;
  443. for (j = subindex.GetCount() - 1; j > m; --j)
  444. {
  445. OP_ASSERT(OpStatus::IsSuccess(subindex[j]->Error()));
  446. if (!subindex[j]->End())
  447. if (Compare(&subindex[j]->Get(), &subindex[i]->Get()))
  448. i = j;
  449. }
  450. return subindex[i]->Get();
  451. }
  452. OP_STATUS MultiOrIterator::Error(void) const
  453. {
  454. register int i;
  455. for (i = subindex.GetCount() - 1; i >= 0; --i)
  456. {
  457. RETURN_IF_ERROR(subindex[i]->Error());
  458. }
  459. return OpStatus::OK;
  460. }
  461. int MultiOrIterator::Count(void) const
  462. {
  463. int count, c, i;
  464. count = 0;
  465. for (i = subindex.GetCount() - 1; i >= 0; --i)
  466. {
  467. c = subindex[i]->Count();
  468. if (c == -1)
  469. return c;
  470. count += c;
  471. }
  472. return count;
  473. }
  474. BOOL MultiOrIterator::End(void) const
  475. {
  476. int i;
  477. for (i = subindex.GetCount() - 1; i >= 0; --i)
  478. if (!subindex[i]->End())
  479. return FALSE;
  480. return TRUE;
  481. }
  482. BOOL MultiOrIterator::Beginning(void) const
  483. {
  484. int i;
  485. for (i = subindex.GetCount() - 1; i >= 0; --i)
  486. if (!subindex[i]->Beginning())
  487. return FALSE;
  488. return TRUE;
  489. }
  490. /*****************************************************************************
  491. FastPrefixIterator */
  492. FastPrefixIterator::~FastPrefixIterator(void)
  493. {
  494. OP_DELETE(m_prefixes);
  495. }
  496. OP_STATUS FastPrefixIterator::AddWord(UINT32 u32_pos)
  497. {
  498. OpAutoPtr< TVector<RankId> > wv;
  499. OpFileLength pos;
  500. pos = ((OpFileLength)u32_pos) * m_index->m_wordbag.GetBlockSize();
  501. wv.reset(OP_NEW(TVector<RankId>, (&RankId::CompareId)));
  502. if (wv.get() == NULL)
  503. return OpStatus::ERR_NO_MEMORY;
  504. RETURN_IF_ERROR(wv->Reserve(m_index->m_wordbag.DataLength(pos) / sizeof(RankId)));
  505. RETURN_IF_ERROR(wv->SetCount(wv->GetSize()));
  506. if (!m_index->m_wordbag.ReadApnd(wv->Ptr(), wv->GetCount() * sizeof(RankId), pos))
  507. return OpStatus::ERR;
  508. RETURN_IF_ERROR(wv->Sort());
  509. return m_full_words.Add(wv.release());
  510. }
  511. void FastPrefixIterator::SetPrefix(SearchIterator<ACT::PrefixResult> *prefix_iterator)
  512. {
  513. OP_ASSERT(m_prefixes == NULL);
  514. m_prefixes = prefix_iterator;
  515. m_status = OpStatus::OK;
  516. }
  517. BOOL FastPrefixIterator::Next(void)
  518. {
  519. int i, j, pos, rjct_pos;
  520. unsigned read_count;
  521. if (m_prefix_data_size < 0)
  522. {
  523. if (OpStatus::IsError(m_prefixes->Error()) || m_prefixes->End())
  524. {
  525. if (m_result_pos < (int)m_result.GetCount())
  526. m_result_pos = m_result.GetCount();
  527. return FALSE;
  528. }
  529. }
  530. ++m_result_pos;
  531. read_count = m_current_prefix.GetCount();
  532. while (m_result_pos >= (int)m_result.GetCount())
  533. {
  534. if ((int)m_current_prefix.GetCount() >= m_prefix_data_size)
  535. {
  536. if (m_prefix_data_size >= 0 && !m_prefixes->Next())
  537. return FALSE;
  538. if ((m_prefix_data_size = m_index->m_wordbag.DataLength(((OpFileLength)m_prefixes->Get().id) * m_index->m_wordbag.GetBlockSize()) / sizeof(RankId)) == 0)
  539. {
  540. OP_ASSERT(0); // no data for this word id?
  541. m_current_prefix.Clear();
  542. continue;
  543. }
  544. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.Reserve(16), FALSE);
  545. read_count = m_prefix_data_size > 16 ? 16 : m_prefix_data_size;
  546. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.SetCount(read_count), FALSE);
  547. if (!m_index->m_wordbag.ReadApnd(m_current_prefix.Ptr(), m_current_prefix.GetCount() * sizeof(RankId),
  548. ((OpFileLength)m_prefixes->Get().id) * m_index->m_wordbag.GetBlockSize()))
  549. {
  550. m_status = OpStatus::ERR;
  551. return FALSE;
  552. }
  553. }
  554. else {
  555. read_count = m_prefix_data_size > (int)(read_count * 2) ? read_count * 2 : m_prefix_data_size;
  556. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.Reserve(read_count), FALSE);
  557. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.SetCount(read_count), FALSE);
  558. if (!m_index->m_wordbag.ReadApnd(m_current_prefix.Ptr(), m_current_prefix.GetCount() * sizeof(RankId),
  559. ((OpFileLength)m_prefixes->Get().id) * m_index->m_wordbag.GetBlockSize()))
  560. {
  561. m_status = OpStatus::ERR;
  562. return FALSE;
  563. }
  564. }
  565. if ((int)m_current_prefix.GetCount() == m_prefix_data_size)
  566. {
  567. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.Sort(), FALSE);
  568. // count can change if there was a PreFlush while a document was indexed
  569. m_prefix_data_size = m_current_prefix.GetCount();
  570. }
  571. else
  572. RETURN_VALUE_IF_ERROR(m_status = m_current_prefix.Sort(), FALSE);
  573. RETURN_VALUE_IF_ERROR(m_status = m_rejects.Reserve(m_rejects.GetCount() + m_current_prefix.GetCount()), FALSE);
  574. for (i = 0; i < (int)m_current_prefix.GetCount(); ++i)
  575. {
  576. VisitedSearch::Result res;
  577. for (j = m_full_words.GetCount() - 1; j >= 0; --j)
  578. {
  579. if ((pos = m_full_words[j]->Search(m_current_prefix[i])) >= (int)m_full_words[j]->GetCount())
  580. break;
  581. if (m_full_words[j]->Get(pos).data[RANKID_ID] != m_current_prefix[i].data[RANKID_ID])
  582. break;
  583. }
  584. if (j >= 0)
  585. continue;
  586. res.id = m_current_prefix[i].data[RANKID_ID];
  587. rjct_pos = m_rejects.Search(res.id);
  588. if (rjct_pos < (int)m_rejects.GetCount() && m_rejects[rjct_pos] == res.id)
  589. continue;
  590. if (m_result.Find(res) != -1)
  591. continue;
  592. RETURN_VALUE_IF_ERROR(m_status = VisitedSearch::Result::ReadResult(res, &(m_index->m_metadata)), FALSE);
  593. if (res.invalid || res.next != 0)
  594. {
  595. if (m_rejects.GetCount() < m_rejects.GetSize())
  596. m_status = m_rejects.Insert(rjct_pos, res.id);
  597. VisitedSearch::Result::DeleteResult(&res);
  598. RETURN_VALUE_IF_ERROR(m_status, FALSE);
  599. continue;
  600. }
  601. m_status = m_result.Add(res);
  602. VisitedSearch::Result::DeleteResult(&res);
  603. RETURN_VALUE_IF_ERROR(m_status, FALSE);
  604. }
  605. }
  606. return TRUE;
  607. }
  608. BOOL FastPrefixIterator::Prev(void)
  609. {
  610. if (m_result_pos <= 0)
  611. return FALSE;
  612. --m_result_pos;
  613. return TRUE;
  614. }
  615. OP_STATUS FastPrefixIterator::Error(void) const
  616. {
  617. RETURN_IF_ERROR(m_prefixes->Error());
  618. return m_status;
  619. }
  620. int FastPrefixIterator::Count(void) const
  621. {
  622. if (m_prefixes->End() && m_result.GetSize() == 0)
  623. return 0;
  624. return -1;
  625. }
  626. BOOL FastPrefixIterator::End(void) const
  627. {
  628. return m_prefixes->End() && (int)m_current_prefix.GetCount() >= m_prefix_data_size && m_result_pos >= (int)m_result.GetCount();
  629. }
  630. BOOL FastPrefixIterator::Beginning(void) const
  631. {
  632. return m_result_pos == 0;
  633. }
  634. const VisitedSearch::Result &FastPrefixIterator::Get(void)
  635. {
  636. return m_result.Get(m_result_pos);
  637. }
  638. /*****************************************************************************
  639. ChainIterator */
  640. // @param cache_iterator NULL or non-empty iterator
  641. OP_STATUS ChainIterator::Init(TVector<uni_char *> &words, const TVector<RankIndex *> &indexes, SearchIterator<VisitedSearch::Result> *cache_iterator, BOOL prefix_search)
  642. {
  643. int i, j;
  644. FastPrefixIterator *it;
  645. SearchIterator<ACT::PrefixResult> *prefix_it;
  646. ACT::WordID wid;
  647. m_words.TakeOver(words);
  648. if (cache_iterator != NULL)
  649. RETURN_IF_ERROR(m_chain.Add(cache_iterator));
  650. for (i = indexes.GetCount() - 1; i >= 0; --i)
  651. {
  652. if ((it = OP_NEW(FastPrefixIterator, (indexes[i]))) == NULL)
  653. return OpStatus::ERR_NO_MEMORY;
  654. if (OpStatus::IsError(m_chain.Add(it)))
  655. {
  656. OP_DELETE(it);
  657. return OpStatus::ERR_NO_MEMORY;
  658. }
  659. for (j = 0; j < (int)m_words.GetCount() - 1; ++j)
  660. {
  661. if ((wid = indexes[i]->m_act.Search(m_words[j])) == 0)
  662. break;
  663. RETURN_IF_ERROR(it->AddWord(wid));
  664. }
  665. if (j != (int)m_words.GetCount() - 1)
  666. {
  667. m_chain.Delete(m_chain.GetCount() - 1);
  668. continue;
  669. }
  670. if ((prefix_it = indexes[i]->m_act.PrefixSearch(m_words[j], !prefix_search)) == NULL)
  671. return OpStatus::ERR_NO_MEMORY;
  672. it->SetPrefix(prefix_it);
  673. }
  674. if (cache_iterator == NULL)
  675. {
  676. m_current_iterator = -1;
  677. do {
  678. if (++m_current_iterator >= (int)m_chain.GetCount())
  679. break;
  680. RETURN_IF_ERROR(((FastPrefixIterator *)m_chain[m_current_iterator])->Init());
  681. } while (m_chain[m_current_iterator]->End());
  682. }
  683. return OpStatus::OK;
  684. }
  685. BOOL ChainIterator::Next(void)
  686. {
  687. while (m_current_iterator < (int)m_chain.GetCount() && !m_chain[m_current_iterator]->Next())
  688. ++m_current_iterator;
  689. return m_current_iterator < (int)m_chain.GetCount();
  690. }
  691. BOOL ChainIterator::Prev(void)
  692. {
  693. while (m_current_iterator >= 0 && !m_chain[m_current_iterator]->Prev())
  694. --m_current_iterator;
  695. return m_current_iterator >= 0;
  696. }
  697. OP_STATUS ChainIterator::Error(void) const
  698. {
  699. if (m_current_iterator < (int)m_chain.GetCount())
  700. return m_chain[m_current_iterator]->Error();
  701. return OpStatus::OK;
  702. }
  703. int ChainIterator::Count(void) const
  704. {
  705. int i, empty;
  706. if (m_chain.GetCount() == 0)
  707. return 0;
  708. empty = 0;
  709. for (i = m_chain.GetCount() - 1; i >= 0; --i)
  710. empty += !m_chain[i]->Empty();
  711. if (empty == 0)
  712. return FALSE;
  713. return -1;
  714. }
  715. BOOL ChainIterator::End(void) const
  716. {
  717. return m_current_iterator >= (int)m_chain.GetCount();
  718. }
  719. BOOL ChainIterator::Beginning(void) const
  720. {
  721. return m_current_iterator < 0;
  722. }
  723. const VisitedSearch::Result &ChainIterator::Get(void)
  724. {
  725. return m_chain[m_current_iterator]->Get();
  726. }
  727. #endif // VISITED_PAGES_SEARCH