Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ACTUtil.cpp 31KB


  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef SEARCH_ENGINE // to remove compilation errors with ADVANCED_OPVECTOR
  10. #include "modules/search_engine/ACTUtil.h"
  11. #include "modules/search_engine/ACT.h"
  12. //#define HTML_EXPORT
  13. #ifdef HTML_EXPORT
  14. #include "modules/search_engine/tests/HTMLExporter.h"
  15. #endif
  16. #ifdef _DEBUG
  17. #define CHECK_INTEGRITY(branch, act) OP_ASSERT(TrieBranch::CheckIntegrity(branch, act))
  18. #else
  19. #define CHECK_INTEGRITY(branch, act)
  20. #endif
  21. OP_STATUS NodePointer::Reset(BSCache::Item::DiskId block_no)
  22. {
  23. BSCache::Item *bi;
  24. offset = 0;
  25. parent = 0;
  26. if (branch != NULL && branch->disk_id == block_no)
  27. return OpStatus::OK;
  28. if (branch != NULL)
  29. act->Release(branch);
  30. if (block_no == 0)
  31. {
  32. branch = NULL;
  33. return OpStatus::OK;
  34. }
  35. RETURN_IF_ERROR(act->Load(&bi, (OpFileLength)(block_no)));
  36. branch = (TrieBranch *)bi;
  37. return OpStatus::OK;
  38. }
  39. void NodePointer::Reset(TrieBranch *b)
  40. {
  41. BSCache::Item *bi;
  42. if (branch == b)
  43. return;
  44. offset = 0;
  45. parent = 0;
  46. if (branch != NULL)
  47. act->Release(branch);
  48. if (b == NULL)
  49. {
  50. branch = NULL;
  51. return;
  52. }
  53. act->Load(&bi, b);
  54. branch = (TrieBranch *)bi;
  55. }
  56. BOOL NodePointer::ValidChar(char position)
  57. {
  58. register int o;
  59. OP_ASSERT(!IsFinal());
  60. if (IsFinal())
  61. return FALSE;
  62. if (HasChild())
  63. return TRUE;
  64. o = GetOffset() + (unsigned char)position - FIRST_CHAR;
  65. return o > 0 && o < TRIE_SIZE;
  66. }
  67. OP_STATUS NodePointer::Goto(char position)
  68. {
  69. BSCache::Item::DiskId branch_no;
  70. BSCache::Item *bi;
  71. TrieBranch *b;
  72. #ifdef _DEBUG
  73. //if (HasChild() && branch->modified)
  74. // This produces false asserts //CHECK_INTEGRITY(branch, act);
  75. #endif
  76. if (HasDiskChild())
  77. {
  78. branch_no = GetDiskChild();
  79. act->Release(branch);
  80. RETURN_IF_ERROR(act->Load(&bi, (OpFileLength)(branch_no)));
  81. branch = (TrieBranch *)bi;
  82. offset = 0;
  83. }
  84. else if (HasMemoryChild())
  85. {
  86. b = GetMemoryChild();
  87. act->Release(branch);
  88. act->Load(&bi, b);
  89. branch = (TrieBranch *)bi;
  90. offset = 0;
  91. }
  92. if (!ValidChar(position))
  93. return OpStatus::ERR_OUT_OF_RANGE;
  94. parent = offset;
  95. offset = GetOffset() + (unsigned char)position - FIRST_CHAR;
  96. return OpStatus::OK;
  97. }
  98. OP_STATUS NodePointer::NewNode(int position)
  99. {
  100. int i, rndpos;
  101. TrieBranch *b;
  102. BSCache::Item *tmp_b;
  103. position -= FIRST_CHAR;
  104. rndpos = act->Random() % (MAX_OFFSET_VALUE - 1) + 1;//(MAX_OFFSET_VALUE - 2 - FIRST_CHAR) + 1 + FIRST_CHAR;
  105. for (i = rndpos; i <= MAX_OFFSET_VALUE; ++i)
  106. {
  107. if (branch->IsFree(i))
  108. {
  109. SetOffset(i - position);
  110. CHECK_INTEGRITY(branch, act);
  111. return OpStatus::OK;
  112. }
  113. }
  114. for (i = rndpos; i > 0; --i)
  115. {
  116. if (branch->IsFree(i))
  117. {
  118. SetOffset(i - position);
  119. CHECK_INTEGRITY(branch, act);
  120. return OpStatus::OK;
  121. }
  122. }
  123. // no free node found
  124. RETURN_IF_ERROR(act->Create(&tmp_b, branch, offset));
  125. b = (TrieBranch *)tmp_b;
  126. SetMemoryChild(b);
  127. act->Release(b);
  128. CHECK_INTEGRITY(branch, act);
  129. CHECK_INTEGRITY(b, act);
  130. return OpStatus::OK;
  131. }
  132. int NodePointer::GetChildrenSize(int node_parent)
  133. {
  134. int size = 0;
  135. int i, sp, ep;
  136. if (node_parent == 0)
  137. node_parent = parent;
  138. if (branch->HasChild(node_parent) || branch->IsFinal(node_parent))
  139. return size;
  140. sp = branch->GetOffset(node_parent);
  141. ep = sp + 256;
  142. if (sp <= 0)
  143. sp = 1;
  144. if (ep > TRIE_SIZE)
  145. ep = TRIE_SIZE;
  146. for (i = sp; i < ep; ++i)
  147. {
  148. if (!branch->IsFree(i) && branch->GetParent(i) == node_parent)
  149. ++size;
  150. }
  151. return size * 9 + size / 3; // a guess based on statistics
  152. }
  153. OP_STATUS NodePointer::Move(int move_parent)
  154. {
  155. TrieBranch *b;
  156. BSCache::Item *bi, *flush_b;
  157. int move_offset, min_offset, max_offset;
  158. int i;
  159. unsigned char c;
  160. NodePointer src(act), dst(act);
  161. BOOL free_node;
  162. RETURN_IF_ERROR(act->Create(&bi, branch, move_parent));
  163. b = (TrieBranch *)bi;
  164. dst.Reset(b);
  165. act->Release(b);
  166. src.Reset(branch);
  167. src.parent = parent;
  168. free_node = FALSE;
  169. c = offset - src.branch->GetOffset(parent) + FIRST_CHAR;
  170. move_offset = src.branch->GetOffset(move_parent);
  171. if (move_parent == parent) // in this case the offset will be 0
  172. {
  173. free_node = TRUE;
  174. dst.branch->SetFinal(c - FIRST_CHAR); // reserve this position
  175. dst.branch->SetParent(c - FIRST_CHAR, parent);
  176. }
  177. min_offset = src.branch->GetOffset(move_parent);
  178. if ((max_offset = min_offset + 256 - FIRST_CHAR) > TRIE_SIZE)
  179. max_offset = TRIE_SIZE;
  180. if (min_offset <= 0)
  181. min_offset = 1;
  182. src.branch->SetMemoryChild(move_parent, dst.branch);
  183. for (i = min_offset; i < max_offset; ++i)
  184. {
  185. if (!src.branch->IsFree(i) && src.branch->GetParent(i) == move_parent)
  186. {
  187. if (src.branch->HasMemoryChild(i))
  188. {
  189. flush_b = act->m_head;
  190. while (flush_b != NULL && flush_b != src.branch->GetMemoryChild(i))
  191. {
  192. if (flush_b != dst.branch && flush_b->disk_id < 0)
  193. RETURN_IF_ERROR(Flush((TrieBranch *)flush_b));
  194. flush_b = flush_b->previous;
  195. }
  196. RETURN_IF_ERROR(Flush(src.branch->GetMemoryChild(i)));
  197. }
  198. TrieBranch::MoveNode(dst.branch, i - move_offset, src.branch, i, FALSE);
  199. dst.branch->SetParent(i - move_offset, 0);
  200. }
  201. }
  202. for (i = min_offset; i < max_offset; ++i)
  203. {
  204. // use src.branch since this->branch may change in MoveChildren
  205. if (!src.branch->IsFree(i) && src.branch->GetParent(i) == move_parent)
  206. {
  207. if (!src.branch->IsFinal(i) && !src.branch->HasChild(i))
  208. {
  209. dst.offset = i - move_offset;
  210. src.offset = i;
  211. RETURN_IF_ERROR(MoveChildren(src, dst, c, &free_node));
  212. }
  213. src.branch->SetFree(i);
  214. }
  215. }
  216. for (i = 1; i < TRIE_SIZE; ++i)
  217. {
  218. if ((dst.branch->HasDiskChild(i) && src.branch->disk_id != dst.branch->GetDiskChild(i)) ||
  219. (dst.branch->HasMemoryChild(i) && src.branch != dst.branch->GetMemoryChild(i)))
  220. dst.Merge(i); // rather don't merge if it was the current branch
  221. }
  222. if (free_node)
  223. {
  224. offset = parent;
  225. RETURN_IF_ERROR(Goto(c));
  226. OP_ASSERT(IsFree() || (IsFinal() && !IsWord() && !HasChild()));
  227. SetFree();
  228. }
  229. #ifdef _DEBUG
  230. ++act->collision_count;
  231. #endif
  232. CHECK_INTEGRITY(src.branch, act);
  233. CHECK_INTEGRITY(dst.branch, act);
  234. return OpStatus::OK;
  235. }
  236. // input:
  237. // src.branch = source branch (branch before Move(...) operation)
  238. // src.offset = parent, whose children should be moved
  239. // src.parent = source parent (parent before Move(...) operation)
  240. // dst.branch = branch to move the children to
  241. // dst.offset = destination parent (new parent of the children)
  242. // next_char = next character on input
  243. // output:
  244. // parent_moved = source parent (parent before Move(...) operation) has been moved and position for next_char is reserved
  245. // changes:
  246. // this->branch might be changed (marked by *parent_moved)
  247. // this->parent might be changed (marked by *parent_moved)
  248. // this->offset should be rather considered as undefined if parent was moved
  249. OP_STATUS NodePointer::MoveChildren(NodePointer src, NodePointer dst, unsigned char next_char, BOOL *parent_moved)
  250. {
  251. Fitter f;
  252. int i, move_offset, reserved_node;
  253. TrieBranch *b;
  254. OP_STATUS err;
  255. BSCache::Item *bi, *flush_b;
  256. reserved_node = 0;
  257. if (OpStatus::IsError(err = f.Parse(src.branch, src.offset)))
  258. {
  259. if (err == OpStatus::ERR_NO_MEMORY && act->m_cache_count > 0)
  260. {
  261. RETURN_IF_ERROR(act->Flush(ACT::ReleaseAll));
  262. RETURN_IF_ERROR(f.Parse(src.branch, src.offset));
  263. }
  264. else return err;
  265. }
  266. if (!*parent_moved && src.offset == parent)
  267. {
  268. reserved_node = next_char;
  269. f.AddNode(reserved_node);
  270. }
  271. if ((move_offset = f.Fit(dst.branch, act->Random() % (MAX_OFFSET_VALUE - 1) + 1)) <= 0)
  272. {
  273. // flush everything, since dst.branch might contain references to memory branches at this point
  274. RETURN_IF_ERROR(act->Flush());
  275. RETURN_IF_ERROR(act->Create(&bi, dst.branch, dst.offset));
  276. b = (TrieBranch *)bi;
  277. dst.SetMemoryChild(b);
  278. dst.Reset(b);
  279. act->Release(b);
  280. move_offset = f.GetOrigin() - src.GetOffset(); // code of the first character
  281. }
  282. else {
  283. if (f.GetSize() > 0)
  284. dst.SetOffset(move_offset - f.GetOrigin() + src.GetOffset());
  285. else {
  286. OP_ASSERT(reserved_node != 0);
  287. dst.SetOffset(move_offset - reserved_node + FIRST_CHAR);
  288. }
  289. }
  290. for (i = 0; i < f.GetSize(); ++i)
  291. {
  292. int from = f.GetOrigin() + f.GetOffset(i);
  293. int to = move_offset + f.GetOffset(i);
  294. OP_ASSERT(from < TRIE_SIZE && from > 0);
  295. if (src.branch->HasMemoryChild(from))
  296. {
  297. flush_b = act->m_head;
  298. while (flush_b != NULL && flush_b != src.branch->GetMemoryChild(from))
  299. {
  300. if (flush_b != dst.branch && flush_b->disk_id < 0)
  301. RETURN_IF_ERROR(Flush((TrieBranch *)flush_b));
  302. flush_b = flush_b->previous;
  303. }
  304. RETURN_IF_ERROR(Flush(src.branch->GetMemoryChild(from)));
  305. }
  306. TrieBranch::MoveNode(dst.branch, to, src.branch, from, FALSE);
  307. dst.branch->SetParent(to, dst.offset);
  308. }
  309. if (reserved_node != 0)
  310. {
  311. *parent_moved = TRUE;
  312. Reset(dst.branch);
  313. parent = dst.offset;
  314. offset = move_offset + f.GetAOffset();
  315. dst.branch->SetFinal(offset);
  316. dst.branch->SetParent(offset, dst.offset);
  317. }
  318. for (i = 0; i < f.GetSize(); ++i)
  319. {
  320. src.offset = f.GetOrigin() + f.GetOffset(i);
  321. if (!src.IsFinal() && !src.HasChild())
  322. {
  323. dst.offset = move_offset + f.GetOffset(i);
  324. RETURN_IF_ERROR(MoveChildren(src, dst, next_char, parent_moved));
  325. }
  326. src.SetFree();
  327. }
  328. return OpStatus::OK;
  329. }
  330. OP_STATUS NodePointer::GetSubTree(char **result, int *count, NodePointer t, int max)
  331. {
  332. Fitter f;
  333. int i;
  334. int prefix_len, prefix_pos;
  335. int char_offset;
  336. char *copy_buf;
  337. if (t.HasDiskChild())
  338. {
  339. RETURN_IF_ERROR(t.Reset(t.GetDiskChild()));
  340. char_offset = 0;
  341. }
  342. else if (t.HasMemoryChild())
  343. {
  344. t.Reset(t.GetMemoryChild());
  345. char_offset = 0;
  346. }
  347. else char_offset = t.GetOffset();
  348. RETURN_IF_ERROR(f.Parse(t.branch, t.offset));
  349. prefix_pos = *count - 1;
  350. prefix_len = (int)op_strlen(result[prefix_pos]);
  351. if (f.GetSize() > 0 && prefix_len > 0 && prefix_len % 32 == 0)
  352. {
  353. RETURN_OOM_IF_NULL(copy_buf = OP_NEWA(char, 32 * (prefix_len / 32 + 1) + 1));
  354. op_strcpy(copy_buf, result[prefix_pos]);
  355. OP_DELETEA(result[prefix_pos]);
  356. result[prefix_pos] = copy_buf;
  357. }
  358. for (i = 0; i < f.GetSize() && *count <= max; ++i)
  359. {
  360. if (i > 0)
  361. {
  362. if (*count >= max)
  363. return OpStatus::OK;
  364. RETURN_OOM_IF_NULL(copy_buf = OP_NEWA(char, 32 * (prefix_len / 32 + 1) + 1));
  365. op_strncpy(copy_buf, result[prefix_pos], prefix_len);
  366. result[(*count)++] = copy_buf;
  367. }
  368. t.offset = f.GetOrigin() + f.GetOffset(i);
  369. result[*count - 1][prefix_len] = t.offset - char_offset + FIRST_CHAR;
  370. result[*count - 1][prefix_len + 1] = 0;
  371. if (t.IsWord() && !t.IsFinal())
  372. {
  373. if (*count + 1 >= max)
  374. return OpStatus::OK;
  375. if (t.act->m_TailCallback == NULL)
  376. {
  377. size_t len = op_strlen(result[*count - 1]) + 1;
  378. RETURN_OOM_IF_NULL(copy_buf = OP_NEWA(char, len));
  379. op_strcpy(copy_buf, result[*count - 1]);
  380. result[*count] = result[*count - 1];
  381. result[*count - 1] = copy_buf;
  382. }
  383. else {
  384. RETURN_IF_ERROR(t.act->m_TailCallback(&copy_buf, t.GetId(), t.act->m_callback_val));
  385. result[*count] = result[*count - 1];
  386. result[*count - 1] = copy_buf;
  387. }
  388. ++*count;
  389. }
  390. if (t.act->m_TailCallback != NULL && !t.IsWord() && t.IsFinal())
  391. {
  392. RETURN_IF_ERROR(t.act->m_TailCallback(&copy_buf, t.GetId(), t.act->m_callback_val));
  393. OP_DELETEA(result[*count - 1]);
  394. result[*count - 1] = copy_buf;
  395. }
  396. if (!t.IsFinal())
  397. {
  398. RETURN_IF_ERROR(GetSubTree(result, count, t, max));
  399. }
  400. }
  401. return OpStatus::OK;
  402. }
  403. OP_STATUS NodePointer::GetSubTree(ACT::WordID *result, int *count, NodePointer t, int max)
  404. {
  405. Fitter f;
  406. int i;
  407. if (*count >= max)
  408. return OpStatus::OK;
  409. if (t.HasDiskChild())
  410. {
  411. RETURN_IF_ERROR(t.Reset(t.GetDiskChild()));
  412. }
  413. else if (t.HasMemoryChild())
  414. {
  415. t.Reset(t.GetMemoryChild());
  416. }
  417. RETURN_IF_ERROR(f.Parse(t.branch, t.offset));
  418. for (i = 0; i < f.GetSize(); ++i)
  419. {
  420. t.offset = f.GetOrigin() + f.GetOffset(i);
  421. if (t.IsWord() || (t.act->m_TailCallback != NULL && t.IsFinal()))
  422. {
  423. result[*count] = t.GetId();
  424. if (++*count >= max)
  425. return OpStatus::OK;
  426. }
  427. if (!t.IsFinal())
  428. {
  429. RETURN_IF_ERROR(GetSubTree(result, count, t, max));
  430. if (*count >= max)
  431. return OpStatus::OK;
  432. }
  433. }
  434. return OpStatus::OK;
  435. }
  436. OP_STATUS NodePointer::GetFirstEntry(ACT::PrefixResult &result, NodePointer t)
  437. {
  438. do
  439. {
  440. Fitter f;
  441. int prefix_len;
  442. int char_offset;
  443. char *copy_buf;
  444. if (t.HasDiskChild())
  445. {
  446. RETURN_IF_ERROR(t.Reset(t.GetDiskChild()));
  447. char_offset = 0;
  448. }
  449. else if (t.HasMemoryChild())
  450. {
  451. t.Reset(t.GetMemoryChild());
  452. char_offset = 0;
  453. }
  454. else char_offset = t.GetOffset();
  455. RETURN_IF_ERROR(f.Parse(t.branch, t.offset));
  456. if (f.GetSize() <= 0)
  457. {
  458. OP_ASSERT(0); // this branch doesn't contain any word
  459. result.id = 0;
  460. return OpStatus::OK;
  461. }
  462. prefix_len = (int)op_strlen(result.utf8_word);
  463. if (prefix_len > 0 && prefix_len % 32 == 0)
  464. {
  465. RETURN_OOM_IF_NULL(copy_buf = OP_NEWA(char, 32 * (prefix_len / 32 + 1) + 1));
  466. op_strcpy(copy_buf, result.utf8_word);
  467. OP_DELETEA(result.utf8_word);
  468. result.utf8_word = copy_buf;
  469. }
  470. t.offset = f.GetOrigin() + f.GetOffset(0);
  471. result.utf8_word[prefix_len] = t.offset - char_offset + FIRST_CHAR;
  472. result.utf8_word[prefix_len + 1] = 0;
  473. if (t.IsWord() || (t.act->m_TailCallback != NULL && t.IsFinal()))
  474. {
  475. if (t.act->m_TailCallback != NULL)
  476. {
  477. RETURN_IF_ERROR(t.act->m_TailCallback(&copy_buf, t.GetId(), t.act->m_callback_val));
  478. OP_DELETEA(result.utf8_word);
  479. result.utf8_word = copy_buf;
  480. }
  481. result.id = t.GetId();
  482. return OpStatus::OK;
  483. }
  484. OP_ASSERT(!t.IsFinal());
  485. }
  486. while (1);
  487. }
  488. OP_BOOLEAN NodePointer::GetNextEntry(ACT::PrefixResult &result, NodePointer t, const char *prev_str)
  489. {
  490. Fitter f;
  491. int i;
  492. int prefix_len;
  493. int char_offset;
  494. char *copy_buf;
  495. OP_BOOLEAN rv;
  496. ACT::SkipNonPrintableChars(prev_str);
  497. if (*prev_str == 0) // shouldn't happen
  498. {
  499. if (t.IsFinal())
  500. return OpBoolean::IS_FALSE;
  501. RETURN_IF_ERROR(GetFirstEntry(result, t));
  502. return result.id == 0 ? OpBoolean::IS_FALSE : OpBoolean::IS_TRUE;
  503. }
  504. if (t.HasDiskChild())
  505. {
  506. RETURN_IF_ERROR(t.Reset(t.GetDiskChild()));
  507. char_offset = 0;
  508. }
  509. else if (t.HasMemoryChild())
  510. {
  511. t.Reset(t.GetMemoryChild());
  512. char_offset = 0;
  513. }
  514. else char_offset = t.GetOffset();
  515. RETURN_IF_ERROR(f.Parse(t.branch, t.offset));
  516. prefix_len = (int)op_strlen(result.utf8_word);
  517. // avoid stack overflow
  518. if (prefix_len > 100)
  519. return OpBoolean::IS_FALSE;
  520. if (f.GetSize() > 0 && prefix_len > 0 && prefix_len % 32 == 0)
  521. {
  522. RETURN_OOM_IF_NULL(copy_buf = OP_NEWA(char, 32 * (prefix_len / 32 + 1) + 1));
  523. op_strcpy(copy_buf, result.utf8_word);
  524. OP_DELETEA(result.utf8_word);
  525. result.utf8_word = copy_buf;
  526. }
  527. for (i = 0; i < f.GetSize(); ++i)
  528. {
  529. t.offset = f.GetOrigin() + f.GetOffset(i);
  530. if (*prev_str == t.offset - char_offset + FIRST_CHAR)
  531. break;
  532. }
  533. if (i >= f.GetSize())
  534. return OpBoolean::IS_FALSE;
  535. if (!t.IsFinal())
  536. {
  537. result.utf8_word[prefix_len] = t.offset - char_offset + FIRST_CHAR;
  538. result.utf8_word[prefix_len + 1] = 0;
  539. if (prev_str[1] != 0)
  540. {
  541. if ((rv = GetNextEntry(result, t, prev_str + 1)) != OpBoolean::IS_FALSE)
  542. return rv;
  543. }
  544. else {
  545. RETURN_IF_ERROR(GetFirstEntry(result, t)); // can fail when the string is too long
  546. return result.id == 0 ? OpBoolean::IS_FALSE : OpBoolean::IS_TRUE;
  547. }
  548. }
  549. ++i;
  550. while (i < f.GetSize())
  551. {
  552. t.offset = f.GetOrigin() + f.GetOffset(i);
  553. result.utf8_word[prefix_len] = t.offset - char_offset + FIRST_CHAR;
  554. result.utf8_word[prefix_len + 1] = 0;
  555. if (t.IsWord())
  556. {
  557. result.id = t.GetId();
  558. return OpBoolean::IS_TRUE;
  559. }
  560. if (t.act->m_TailCallback != NULL && !t.IsWord() && t.IsFinal())
  561. {
  562. RETURN_IF_ERROR(t.act->m_TailCallback(&copy_buf, t.GetId(), t.act->m_callback_val));
  563. OP_DELETEA(result.utf8_word);
  564. result.utf8_word = copy_buf;
  565. result.id = t.GetId();
  566. return OpBoolean::IS_TRUE;
  567. }
  568. if (!t.IsFinal())
  569. {
  570. RETURN_IF_ERROR(GetFirstEntry(result, t));
  571. return result.id == 0 ? OpBoolean::IS_FALSE : OpBoolean::IS_TRUE;
  572. }
  573. }
  574. return OpBoolean::IS_FALSE;
  575. }
  576. OP_STATUS NodePointer::Flush(TrieBranch *branch)
  577. {
  578. BSCache::Item::DiskId new_id;
  579. if (act->m_flush_mode != BSCache::JournalOnly && act->m_flush_mode != BSCache::JournalAll)
  580. return branch->Flush(&(act->m_storage));
  581. if (branch->disk_id > 0)
  582. return OpStatus::OK;
  583. new_id = (BSCache::Item::DiskId)(act->m_storage.Reserve() / act->m_storage.GetBlockSize());
  584. if (new_id == 0)
  585. return OpStatus::ERR_NO_DISK;
  586. branch->id_reserved = TRUE;
  587. branch->OnIdChange(new_id, branch->disk_id);
  588. branch->disk_id = new_id;
  589. branch->journalled = TRUE;
  590. return OpStatus::OK;
  591. }
  592. BOOL NodePointer::Reposition(int node_parent, char next_char)
  593. {
  594. Fitter f;
  595. OP_STATUS err;
  596. int i, j, move_offset, max_offset;
  597. #ifdef HTML_EXPORT
  598. HTMLExporter exp;
  599. #endif
  600. if (node_parent == 0)
  601. node_parent = parent;
  602. OP_ASSERT(node_parent > 0 && node_parent < TRIE_SIZE);
  603. if (OpStatus::IsError(err = f.Parse(branch, node_parent)))
  604. {
  605. if (err == OpStatus::ERR_NO_MEMORY && act->m_cache_count > 0)
  606. {
  607. if (OpStatus::IsError(act->Flush(ACT::ReleaseAll)) ||
  608. OpStatus::IsError(f.Parse(branch, node_parent)))
  609. return FALSE;
  610. }
  611. else return FALSE;
  612. }
  613. OP_ASSERT(f.GetSize() > 0);
  614. if (next_char != 0)
  615. f.AddNode(next_char);
  616. else if (node_parent == parent)
  617. f.AddNode(offset - branch->GetOffset(parent) + FIRST_CHAR);
  618. if ((move_offset = f.Fit(branch, act->Random() % (MAX_OFFSET_VALUE - 1) + 1)) <= 0)
  619. {
  620. #ifdef HTML_EXPORT
  621. exp.SetClass("free", "white");
  622. exp.SetClass("occupied", "aqua");
  623. exp.SetClass("active", "maroon");
  624. exp.SetClass("conflict", "red");
  625. exp.SetClass("act_w_child", "fuchsia");
  626. exp.Open(UNI_L("modules/search_engine/tests/data/failed.html"), 1, 8);
  627. for (i = 1; i < TRIE_SIZE; ++i)
  628. {
  629. if (branch->IsFree(i))
  630. {
  631. exp.Add("free");
  632. continue;
  633. }
  634. if (branch->GetParent(i) == node_parent &&
  635. branch->HasChild(i))
  636. {
  637. exp.Add("act_w_child");
  638. continue;
  639. }
  640. if (branch->GetParent(i) == node_parent)
  641. {
  642. exp.Add("active");
  643. continue;
  644. }
  645. if (i == branch->GetOffset(node_parent) + (unsigned char)next_char - FIRST_CHAR)
  646. {
  647. exp.Add("conflict");
  648. continue;
  649. }
  650. exp.Add("occupied");
  651. }
  652. exp.Close();
  653. if (TRIE_SIZE - (int)branch->NumFilled() + f.GetSize() > TRIE_SIZE / 2) // fill factor less than x%
  654. ++i; // just for a breakpoint
  655. #endif
  656. return FALSE;
  657. }
  658. if (node_parent == parent)
  659. offset = move_offset + f.GetAOffset();
  660. branch->SetOffset(node_parent, move_offset - f.GetOrigin() + branch->GetOffset(node_parent));
  661. // set the children's parent of the nodes being moved
  662. for (i = 0; i < f.GetSize(); ++i)
  663. {
  664. if (branch->HasChild(f.GetOrigin() + f.GetOffset(i)) || branch->IsFinal(f.GetOrigin() + f.GetOffset(i)))
  665. continue;
  666. if (f.GetOrigin() + f.GetOffset(i) == parent)
  667. parent = move_offset + f.GetOffset(i);
  668. j = branch->GetOffset(f.GetOrigin() + f.GetOffset(i));
  669. if ((max_offset = j + 256 - FIRST_CHAR) > TRIE_SIZE)
  670. max_offset = TRIE_SIZE;
  671. if (j <= 0)
  672. j = 1;
  673. while (j < max_offset)
  674. {
  675. if (branch->GetParent(j) == f.GetOrigin() + f.GetOffset(i))
  676. branch->SetParent(j, move_offset + f.GetOffset(i));
  677. ++j;
  678. }
  679. }
  680. if (move_offset > f.GetOrigin())
  681. {
  682. for (i = f.GetSize() - 1; i >= 0; --i)
  683. {
  684. int from = f.GetOrigin() + f.GetOffset(i);
  685. int to = move_offset + f.GetOffset(i);
  686. TrieBranch::MoveNode(branch, to, branch, from, TRUE);
  687. }
  688. }
  689. else {
  690. for (i = 0; i < f.GetSize(); ++i)
  691. {
  692. int from = f.GetOrigin() + f.GetOffset(i);
  693. int to = move_offset + f.GetOffset(i);
  694. TrieBranch::MoveNode(branch, to, branch, from, TRUE);
  695. }
  696. }
  697. #ifdef _DEBUG
  698. ++act->collision_count;
  699. #endif
  700. CHECK_INTEGRITY(branch, act);
  701. return TRUE;
  702. }
  703. BOOL NodePointer::Merge(int branch_parent)
  704. {
  705. Fitter f;
  706. OP_STATUS err;
  707. int i, move_offset, diff_offset;
  708. BSCache::Item *bi;
  709. TrieBranch *b;
  710. #ifdef HTML_EXPORT
  711. HTMLExporter exp;
  712. #endif
  713. if (branch->HasMemoryChild(branch_parent))
  714. {
  715. act->Load(&bi, branch->GetMemoryChild(branch_parent));
  716. }
  717. else
  718. {
  719. RETURN_VALUE_IF_ERROR(act->Load(&bi, (OpFileLength)(branch->GetDiskChild(branch_parent))), FALSE);
  720. }
  721. b = (TrieBranch *)bi;
  722. #ifdef HTML_EXPORT
  723. exp.SetClass("free", "white");
  724. exp.SetClass("occupied", "aqua");
  725. // exp.SetClass("moved", "red");
  726. exp.Open(UNI_L("modules/search_engine/tests/data/merge.html"), 1, 8);
  727. for (i = 1; i < TRIE_SIZE; ++i)
  728. {
  729. if (branch->IsFree(i))
  730. {
  731. exp.Add("free");
  732. continue;
  733. }
  734. exp.Add("occupied");
  735. }
  736. exp.NewLine();
  737. for (i = 1; i < TRIE_SIZE; ++i)
  738. {
  739. if (b->IsFree(i))
  740. {
  741. exp.Add("free");
  742. continue;
  743. }
  744. exp.Add("occupied");
  745. }
  746. exp.Close();
  747. #endif
  748. if (OpStatus::IsError(err = f.ParseAll(b)))
  749. {
  750. if (err == OpStatus::ERR_NO_MEMORY && act->m_cache_count > 0)
  751. {
  752. if (OpStatus::IsError(act->Flush(ACT::ReleaseAll)) ||
  753. OpStatus::IsError(f.ParseAll(b)))
  754. {
  755. act->Release(b);
  756. return FALSE;
  757. }
  758. }
  759. else {
  760. act->Release(b);
  761. return FALSE;
  762. }
  763. }
  764. if ((move_offset = f.Fit(branch, act->Random() % (MAX_OFFSET_VALUE - 1) + 1)) <= 0)
  765. {
  766. act->Release(b);
  767. return FALSE;
  768. }
  769. diff_offset = move_offset - f.GetOrigin();
  770. for (i = f.GetSize() - 1; i >= 0; --i)
  771. {
  772. int from = f.GetOrigin() + f.GetOffset(i);
  773. int to = move_offset + f.GetOffset(i);
  774. TrieBranch::MoveNode(branch, to, b, from, TRUE);
  775. if (branch->GetParent(to) == 0)
  776. branch->SetParent(to, branch_parent);
  777. else branch->SetParent(to, branch->GetParent(to) + diff_offset);
  778. if (!branch->HasChild(to) && !branch->IsFinal(to))
  779. branch->SetOffset(to, branch->GetOffset(to) + diff_offset);
  780. }
  781. act->Unlink(b);
  782. act->Release(b);
  783. branch->SetOffset(branch_parent, diff_offset);
  784. return TRUE;
  785. }
  786. OP_STATUS Fitter::Parse(TrieBranch *b, int parent)
  787. {
  788. int i, j, d, max_offset;
  789. this->src = b;
  790. this->parent = parent;
  791. this->reserved_node = 0;
  792. size = 0;
  793. d = 0;
  794. i = b->GetOffset(parent);
  795. if ((max_offset = i + 256 - FIRST_CHAR) > TRIE_SIZE)
  796. max_offset = TRIE_SIZE;
  797. if (i <= 0)
  798. i = 1;
  799. while (i < max_offset)
  800. {
  801. if (!b->IsFree(i) && b->GetParent(i) == parent)
  802. {
  803. d = i++;
  804. ++size;
  805. break;
  806. }
  807. ++i;
  808. }
  809. while (i < max_offset)
  810. {
  811. if (!b->IsFree(i) && b->GetParent(i) == parent)
  812. ++size;
  813. ++i;
  814. }
  815. if (size > 0)
  816. {
  817. RETURN_OOM_IF_NULL(distances = OP_NEWA(int, size));
  818. distances[0] = d;
  819. for (i = d + 1, j = 1; j < size; ++i)
  820. {
  821. if (!b->IsFree(i) && b->GetParent(i) == parent)
  822. {
  823. distances[j] = i - d;
  824. ++j;
  825. }
  826. }
  827. }
  828. return OpStatus::OK;
  829. }
  830. OP_STATUS Fitter::ParseAll(TrieBranch *b)
  831. {
  832. int i, j, d;
  833. this->src = b;
  834. this->parent = -1;
  835. this->reserved_node = 0;
  836. if ((size = b->NumFilled()) == 0)
  837. return OpStatus::OK;
  838. d = 0;
  839. i = 1;
  840. RETURN_OOM_IF_NULL(distances = OP_NEWA(int, size));
  841. while (i < TRIE_SIZE)
  842. {
  843. if (!b->IsFree(i))
  844. {
  845. // merging branches with mem. children may lead to crash, abort
  846. if (b->HasChild(i))
  847. {
  848. OP_DELETEA(distances);
  849. distances = NULL;
  850. size = 0;
  851. return OpStatus::ERR;
  852. }
  853. d = i++;
  854. break;
  855. }
  856. ++i;
  857. }
  858. if (size > 0)
  859. {
  860. distances[0] = d;
  861. for (i = d + 1, j = 1; j < size; ++i)
  862. {
  863. if (!b->IsFree(i))
  864. {
  865. // merging branches with mem. children may lead to crash, abort
  866. if (b->HasChild(i))
  867. {
  868. OP_DELETEA(distances);
  869. distances = NULL;
  870. size = 0;
  871. return OpStatus::ERR;
  872. }
  873. distances[j] = i - d;
  874. ++j;
  875. }
  876. }
  877. OP_ASSERT(j == size);
  878. }
  879. return OpStatus::OK;
  880. }
  881. void Fitter::AddNode(char node)
  882. {
  883. if (size <= 0)
  884. reserved_node = 0;
  885. else
  886. {
  887. OP_ASSERT(parent >= 0);
  888. if (parent >= 0)
  889. reserved_node = src->GetOffset(parent) + (unsigned char)node - FIRST_CHAR - distances[0];
  890. }
  891. }
  892. int Fitter::Fit(TrieBranch *b, int start)
  893. {
  894. int offset, i, last;
  895. if (size > 1)
  896. last = distances[size - 1];
  897. else last = 0;
  898. if (start + last >= TRIE_SIZE)
  899. start = TRIE_SIZE - 1 - last;
  900. offset = start;
  901. if (offset + reserved_node <= 0) // reserved_node may be negative
  902. offset = -reserved_node + 1;
  903. i = -1;
  904. do {
  905. while (offset + last < TRIE_SIZE && !b->IsFree(offset))
  906. ++offset;
  907. i = -1;
  908. if (offset + last >= TRIE_SIZE || offset + reserved_node >= TRIE_SIZE)
  909. break;
  910. for (i = 1; i < size; ++i)
  911. {
  912. if (!b->IsFree(offset + distances[i]) /*&&
  913. (src != b || b->GetParent(offset + distances[i]) != parent)*/)
  914. {
  915. ++offset;
  916. break;
  917. }
  918. }
  919. if (i >= size && !b->IsFree(offset + reserved_node) /*&&
  920. (src != b || b->GetParent(offset + reserved_node) != parent)*/)
  921. {
  922. ++offset;
  923. i = -1;
  924. }
  925. } while (i < size);
  926. if (offset + last < TRIE_SIZE && i >= size)
  927. return offset;
  928. offset = start;
  929. if (offset + last >= TRIE_SIZE)
  930. offset = TRIE_SIZE - 1 - last;
  931. if (offset + reserved_node >= TRIE_SIZE)
  932. offset = TRIE_SIZE - 1 - reserved_node;
  933. do {
  934. while (offset > 0 && !b->IsFree(offset))
  935. --offset;
  936. i = -1;
  937. if (offset <= 0 || offset + reserved_node <= 0)
  938. return 0;
  939. for (i = 1; i < size; ++i)
  940. {
  941. if (!b->IsFree(offset + distances[i]) /*&&
  942. (src != b || b->data[offset + distances[i]].GetParent() != parent)*/)
  943. {
  944. --offset;
  945. break;
  946. }
  947. }
  948. if (i >= size && !b->IsFree(offset + reserved_node) /*&&
  949. (src != b || b->GetParent(offset + reserved_node) != parent)*/)
  950. {
  951. --offset;
  952. i = -1;
  953. }
  954. } while (i < size);
  955. return offset; // not-found handled in the loop
  956. }
  957. TrieBranch::TrieBranch(int id, TrieBranch *rbranch, int rnode, unsigned short nur)
  958. {
  959. disk_id = -id;
  960. parent_branch = rbranch;
  961. parent_pos = rnode;
  962. data[0].SetFlags(0);
  963. data[0].SetOffset(0);
  964. data[0].SetId(0);
  965. modified = TRUE;
  966. NUR_mark = nur;
  967. }
  968. TrieBranch::TrieBranch(OpFileLength id, unsigned short nur)
  969. {
  970. disk_id = (DiskId)id;
  971. parent_branch = NULL;
  972. parent_pos = 0;
  973. NUR_mark = nur;
  974. }
  975. OP_STATUS TrieBranch::Read(BlockStorage *storage)
  976. {
  977. if (!storage->Read(data, GetPackedSize(), (OpFileLength)(((OpFileLength)disk_id) * storage->GetBlockSize())))
  978. return OpStatus::ERR_NO_ACCESS;
  979. Unpack();
  980. return OpStatus::OK;
  981. }
  982. OP_STATUS TrieBranch::Write(BlockStorage *storage)
  983. {
  984. Pack();
  985. if (storage->Write(data, GetPackedSize(), (OpFileLength)(((OpFileLength)disk_id) * storage->GetBlockSize())) == 0)
  986. return OpStatus::ERR_NO_ACCESS;
  987. Unpack();
  988. return OpStatus::OK;
  989. }
  990. OP_STATUS TrieBranch::Flush(BlockStorage *storage)
  991. {
  992. OpFileLength did;
  993. #ifdef _DEBUG
  994. OP_ASSERT(modified);
  995. for (int i = 1; i < TRIE_SIZE; ++i)
  996. OP_ASSERT(!HasMemoryChild(i));
  997. // This produces false asserts //OP_ASSERT(CheckIntegrity(this, storage));
  998. #endif
  999. if (disk_id > 0 && !id_reserved)
  1000. {
  1001. Pack();
  1002. if (!storage->Update(data, GetPackedSize(), (OpFileLength)(((OpFileLength)disk_id) * storage->GetBlockSize())))
  1003. return OpStatus::ERR_NO_DISK;
  1004. Unpack();
  1005. }
  1006. else {
  1007. Pack();
  1008. if (id_reserved)
  1009. {
  1010. if ((did = storage->Write(data, GetPackedSize(), (OpFileLength)(((OpFileLength)disk_id) * storage->GetBlockSize())) / storage->GetBlockSize()) == 0)
  1011. {
  1012. Unpack();
  1013. return OpStatus::ERR_NO_DISK;
  1014. }
  1015. }
  1016. else {
  1017. if ((did = storage->Write(data, GetPackedSize()) / storage->GetBlockSize()) == 0)
  1018. {
  1019. Unpack();
  1020. return OpStatus::ERR_NO_DISK;
  1021. }
  1022. }
  1023. Unpack();
  1024. OnIdChange((DiskId)did, disk_id);
  1025. disk_id = (DiskId)did;
  1026. }
  1027. modified = FALSE;
  1028. return OpStatus::OK;
  1029. }
  1030. void TrieBranch::OnIdChange(DiskId new_id, DiskId old_id)
  1031. {
  1032. if (new_id == old_id)
  1033. return;
  1034. OP_ASSERT(parent_branch->GetMemoryChild(parent_pos) == this);
  1035. parent_branch->SetDiskChild(parent_pos, new_id);
  1036. }
  1037. void TrieNode::Pack(char *to)
  1038. {
  1039. OP_ASSERT(!HasMemoryChild());
  1040. op_memmove(to, &offset, 4);
  1041. op_memmove(to + 4, &flags_parent, 2);
  1042. op_memmove(to + 6, &id, 4);
  1043. }
  1044. void TrieNode::Unpack(char *from)
  1045. {
  1046. op_memmove(&id, from + 6, 4);
  1047. op_memmove(&flags_parent, from + 4, 2);
  1048. op_memmove(&offset, from, 4);
  1049. OP_ASSERT(!HasMemoryChild());
  1050. }
  1051. void TrieBranch::Pack()
  1052. {
  1053. for (int i = 0; i < TRIE_SIZE; ++i)
  1054. data[i].Pack((char*)data + i * TrieNode::GetPackedSize());
  1055. }
  1056. void TrieBranch::Unpack()
  1057. {
  1058. for (int i = TRIE_SIZE - 1; i >= 0; --i)
  1059. data[i].Unpack((char*)data + i * TrieNode::GetPackedSize());
  1060. }
  1061. void TrieBranch::MoveNode(TrieBranch* dst, int to, TrieBranch* src, int from, BOOL freeSrc)
  1062. {
  1063. OP_ASSERT(!src->IsFree(from));
  1064. OP_ASSERT(dst->IsFree(to));
  1065. OP_ASSERT(from > 0 && from < TRIE_SIZE);
  1066. OP_ASSERT(to > 0 && to < TRIE_SIZE);
  1067. if (src->HasMemoryChild(from))
  1068. {
  1069. src->GetMemoryChild(from)->parent_branch = dst;
  1070. src->GetMemoryChild(from)->parent_pos = to;
  1071. }
  1072. dst->data[to] = src->data[from];
  1073. dst->IncNumFilled();
  1074. dst->modified = TRUE;
  1075. if (freeSrc)
  1076. src->SetFree(from);
  1077. }
  1078. void TrieBranch::SetFlags(int i, int flags)
  1079. {
  1080. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1081. if (GetFlags(i) != flags)
  1082. {
  1083. BOOL wasFree = IsFree(i);
  1084. modified = TRUE;
  1085. data[i].SetFlags(flags);
  1086. if (!wasFree && IsFree(i))
  1087. DecNumFilled();
  1088. else if (wasFree && !IsFree(i))
  1089. IncNumFilled();
  1090. }
  1091. }
  1092. void TrieBranch::SetOffset(int i, int offset)
  1093. {
  1094. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1095. modified = TRUE;
  1096. SetFlags(i, GetFlags(i) & ~(TrieNode::Free | TrieNode::Child | TrieNode::Final));
  1097. data[i].SetOffset(offset);
  1098. }
  1099. void TrieBranch::SetDiskChild(int i, BSCache::Item::DiskId disk_child)
  1100. {
  1101. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1102. modified = TRUE;
  1103. SetFlags(i, (GetFlags(i) & ~(TrieNode::Free | TrieNode::Child | TrieNode::Final)) | TrieNode::DiskChild);
  1104. data[i].SetDiskChild(disk_child);
  1105. }
  1106. void TrieBranch::SetMemoryChild(int i, TrieBranch *mem_child)
  1107. {
  1108. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1109. modified = TRUE;
  1110. SetFlags(i, (GetFlags(i) & ~(TrieNode::Free | TrieNode::Child | TrieNode::Final)) | TrieNode::MemoryChild);
  1111. data[i].SetMemoryChild(mem_child);
  1112. }
  1113. void TrieBranch::SetParent(int i, int p)
  1114. {
  1115. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1116. modified = TRUE;
  1117. data[i].SetParent(p);
  1118. }
  1119. void TrieBranch::SetId(int i, ACT::WordID id)
  1120. {
  1121. OP_ASSERT(i > 0 && i < TRIE_SIZE);
  1122. modified = TRUE;
  1123. data[i].SetId(id);
  1124. }
  1125. int TrieNode::SwitchEndian(void *data, int size, void *user_arg)
  1126. {
  1127. BlockStorage::SwitchEndian((unsigned char *)data, 4);
  1128. BlockStorage::SwitchEndian((unsigned char *)data + 4, 2);
  1129. BlockStorage::SwitchEndian((unsigned char *)data + 6, 4);
  1130. return 10;
  1131. }
  1132. #ifdef _DEBUG
  1133. BOOL TrieBranch::CheckIntegrity(const TrieBranch *branch, ACT *act)
  1134. {
  1135. int fill_nodes, i;
  1136. if (!branch->modified) // This does not happen often. If it does, the same contents should be on disk
  1137. {
  1138. TrieBranch b(branch->disk_id, 0);
  1139. if (b.disk_id < 0)
  1140. return FALSE;
  1141. RETURN_VALUE_IF_ERROR(b.Read(&act->m_storage), FALSE);
  1142. for (i = 0; i < TRIE_SIZE; ++i)
  1143. if (!b.data[i].Equals(branch->data[i]))
  1144. return FALSE;
  1145. }
  1146. if (branch->GetFlags(0) != 0 ||
  1147. branch->GetParent(0) != 0 ||
  1148. branch->GetOffset(0) != 0)
  1149. return FALSE;
  1150. fill_nodes = 0;
  1151. for (i = 1; i < TRIE_SIZE; ++i)
  1152. {
  1153. if (!branch->IsFree(i))
  1154. {
  1155. ++fill_nodes;
  1156. if (branch->HasMemoryChild(i))
  1157. {
  1158. if (!branch->modified || branch->GetMemoryChild(i) == branch)
  1159. return FALSE;
  1160. }
  1161. else if (branch->HasDiskChild(i))
  1162. {
  1163. if (branch->IsFinal(i))
  1164. return FALSE;
  1165. if (branch->GetDiskChild(i) == 0 || branch->GetDiskChild(i) == branch->disk_id)
  1166. return FALSE;
  1167. }
  1168. else if (!branch->IsFinal(i))
  1169. {
  1170. if (branch->GetOffset(i) + 255 - FIRST_CHAR <= 0 ||
  1171. branch->GetOffset(i) >= TRIE_SIZE)
  1172. return FALSE;
  1173. }
  1174. if (!branch->IsWord(i) && branch->IsFinal(i) && act->m_TailCallback == NULL)
  1175. return FALSE;
  1176. if (branch->GetParent(i) == 0)
  1177. {
  1178. if (i > 256 - FIRST_CHAR)
  1179. return FALSE;
  1180. }
  1181. else
  1182. {
  1183. if (branch->HasChild(branch->GetParent(i)))
  1184. return FALSE;
  1185. if (branch->IsFinal(branch->GetParent(i)))
  1186. return FALSE;
  1187. }
  1188. }
  1189. }
  1190. if (fill_nodes != (int)branch->NumFilled())
  1191. return FALSE;
  1192. return TRUE;
  1193. }
  1194. #endif
  1195. #undef CHECK_INTEGRITY
  1196. #endif // SEARCH_ENGINE