Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ACTUtil.h 11KB


  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #ifndef ACTUTIL_H
  9. #define ACTUTIL_H
  10. #include "modules/search_engine/ACT.h"
  11. #include "modules/search_engine/BSCache.h"
  12. #if FIRST_CHAR >= 2
  13. #define TRIE_SIZE 254
  14. #else
  15. #define TRIE_SIZE 306
  16. #endif
  17. //#define MAX_OFFSET_VALUE (TRIE_SIZE - 20)
  18. #define MAX_OFFSET_VALUE TRIE_SIZE - 1
  19. /*
  20. * record representing one character in ACT structure
  21. *
  22. * Invariants:
  23. *
  24. * 1) Free nodes
  25. * a) cannot be a word
  26. * b) cannot have a child
  27. * c) cannot be final
  28. * d) offset, id and parent are undefined
  29. *
  30. * 2) Leaf nodes
  31. * a) are final
  32. * b) cannot have a child
  33. * c) offset is undefined
  34. * d) if it is a word, id identifies the word
  35. * e) if it is not a word, tail compression must be in use and id identifies the word
  36. * f) parent is defined, or 0 for the root node
  37. *
  38. * 3) Internal nodes
  39. * a) are not final
  40. * b) must have a disk child, a memory child or an interleaved child determined by offset
  41. * c) if it is a word, id identifies the word
  42. * d) if it is not a word, id is undefined
  43. * e) parent is defined, or 0 for the root node
  44. */
  45. class TrieNode
  46. {
  47. private:
  48. friend class TrieBranch;
  49. enum Flags
  50. {
  51. Free = 1,
  52. DiskChild = 2,
  53. MemoryChild = 4,
  54. Child = DiskChild | MemoryChild,
  55. Word = 8,
  56. Final = 16
  57. };
  58. TrieNode()
  59. {
  60. mem_child = 0;
  61. flags_parent = Free << 11;
  62. id = 0;
  63. }
  64. int GetFlags() const
  65. {
  66. return flags_parent >> 11;
  67. }
  68. BOOL IsFree() const { return (GetFlags() & Free) != 0; }
  69. BOOL IsWord() const { return (GetFlags() & Word) != 0; }
  70. BOOL IsFinal() const { return (GetFlags() & Final) != 0; }
  71. BOOL HasChild() const { return (GetFlags() & Child) != 0; }
  72. BOOL HasDiskChild() const { return (GetFlags() & DiskChild) != 0; }
  73. BOOL HasMemoryChild() const { return (GetFlags() & MemoryChild) != 0; }
  74. void SetFlags(int f)
  75. {
  76. flags_parent = (UINT16)((flags_parent & 0x7FF) | (f << 11));
  77. OP_ASSERT(!(HasDiskChild() && HasMemoryChild()));
  78. OP_ASSERT(!IsFree() || (!HasChild() && !IsFinal() && !IsWord()));
  79. }
  80. int GetOffset() const
  81. {
  82. OP_ASSERT(!IsFree() && !IsFinal() && !HasChild());
  83. OP_ASSERT(offset + 255 - FIRST_CHAR > 0 && offset < TRIE_SIZE);
  84. return offset;
  85. }
  86. void SetOffset(int offset)
  87. {
  88. OP_ASSERT(offset + 255 - FIRST_CHAR > 0 && offset < TRIE_SIZE);
  89. this->offset = offset;
  90. }
  91. BSCache::Item::DiskId GetDiskChild() const
  92. {
  93. OP_ASSERT(!IsFree() && !IsFinal() && HasDiskChild());
  94. return disk_child;
  95. }
  96. void SetDiskChild(BSCache::Item::DiskId disk_child)
  97. {
  98. this->disk_child = disk_child;
  99. }
  100. TrieBranch *GetMemoryChild() const
  101. {
  102. OP_ASSERT(!IsFree() && !IsFinal() && HasMemoryChild());
  103. return mem_child;
  104. }
  105. void SetMemoryChild(TrieBranch *mem_child)
  106. {
  107. this->mem_child = mem_child;
  108. }
  109. int GetParent() const
  110. {
  111. return flags_parent & 0x7FF;
  112. }
  113. void SetParent(int p)
  114. {
  115. flags_parent = (UINT16)((flags_parent & 0xF800) | p);
  116. }
  117. ACT::WordID GetId() const
  118. {
  119. return id;
  120. }
  121. void SetId(ACT::WordID id)
  122. {
  123. this->id = id;
  124. }
  125. BOOL Equals(const TrieNode& n) const
  126. {
  127. return (flags_parent == n.flags_parent && id == n.id &&
  128. IsFree() ||
  129. ((!HasDiskChild() || disk_child == n.disk_child) &&
  130. (!HasMemoryChild() || mem_child == n.mem_child) &&
  131. (HasChild() || IsFinal() || offset == n.offset)));
  132. }
  133. void Pack(char *to);
  134. void Unpack(char *from);
  135. static int GetPackedSize() { return 10; }
  136. // 8B union int offset/int disk_child/* mem_child
  137. // 2B 5b flags, 11b parent
  138. // 4B word id
  139. union {
  140. BSCache::Item::DiskId disk_child;
  141. INT32 offset;
  142. TrieBranch *mem_child;
  143. };
  144. UINT16 flags_parent;
  145. ACT::WordID id;
  146. public:
  147. static int SwitchEndian(void *data, int size, void *user_arg);
  148. };
  149. /*
  150. * one branch in ACT structure, consists of TrieNode[TRIE_SIZE]
  151. */
  152. class TrieBranch : public BSCache::Item
  153. {
  154. public:
  155. TrieBranch(int id, TrieBranch *rbranch, int rnode, unsigned short nur);
  156. TrieBranch(OpFileLength id, unsigned short nur);
  157. UINT32 NumFilled() const { return (UINT32)data[0].GetId(); }
  158. CHECK_RESULT(virtual OP_STATUS Read(BlockStorage *storage));
  159. CHECK_RESULT(virtual OP_STATUS Write(BlockStorage *storage));
  160. CHECK_RESULT(virtual OP_STATUS Flush(BlockStorage *storage));
  161. virtual void OnIdChange(DiskId new_id, DiskId old_id);
  162. static int GetPackedSize() { return TRIE_SIZE * TrieNode::GetPackedSize(); }
  163. static void MoveNode(TrieBranch* dst, int to, TrieBranch* src, int from, BOOL freeSrc);
  164. #define AR OP_ASSERT(i >= 0 && i < TRIE_SIZE)
  165. BOOL IsFree (int i) const { AR; return data[i].IsFree(); }
  166. BOOL IsWord (int i) const { AR; return data[i].IsWord(); }
  167. BOOL IsFinal (int i) const { AR; return data[i].IsFinal(); }
  168. BOOL HasChild (int i) const { AR; return data[i].HasChild(); }
  169. BOOL HasDiskChild (int i) const { AR; return data[i].HasDiskChild(); }
  170. BOOL HasMemoryChild (int i) const { AR; return data[i].HasMemoryChild(); }
  171. int GetOffset (int i) const { AR; return data[i].GetOffset(); }
  172. DiskId GetDiskChild (int i) const { AR; return data[i].GetDiskChild(); }
  173. TrieBranch* GetMemoryChild (int i) const { AR; return data[i].GetMemoryChild(); }
  174. int GetParent (int i) const { AR; return data[i].GetParent(); }
  175. ACT::WordID GetId (int i) const { AR; return data[i].GetId(); }
  176. void SetFree (int i) { SetFlags(i, TrieNode::Free); }
  177. void SetFinal (int i) { SetFlags(i, TrieNode::Final); }
  178. void SetFinalWord (int i) { SetFlags(i, TrieNode::Final | TrieNode::Word); }
  179. void SetIsWord (int i) { SetFlags(i, (GetFlags(i) & ~TrieNode::Free) | TrieNode::Word); }
  180. void SetIsNotWord (int i) { SetFlags(i, GetFlags(i) & ~TrieNode::Word); }
  181. void SetOffset (int i, int offset);
  182. void SetDiskChild (int i, DiskId disk_child);
  183. void SetMemoryChild (int i, TrieBranch *mem_child);
  184. void SetParent (int i, int p);
  185. void SetId (int i, ACT::WordID id);
  186. #ifdef _DEBUG
  187. static BOOL CheckIntegrity(const TrieBranch *branch, ACT *act);
  188. #endif
  189. private:
  190. TrieBranch *parent_branch;
  191. int parent_pos;
  192. TrieNode data[TRIE_SIZE];
  193. void Pack();
  194. void Unpack();
  195. int GetFlags(int i) const { AR; return data[i].GetFlags(); }
  196. void SetFlags(int i, int f);
  197. void IncNumFilled()
  198. {
  199. OP_ASSERT(data[0].GetId() < TRIE_SIZE-1);
  200. data[0].SetId(data[0].GetId() + 1);
  201. }
  202. void DecNumFilled()
  203. {
  204. OP_ASSERT(data[0].GetId() > 0);
  205. data[0].SetId(data[0].GetId() - 1);
  206. }
  207. #undef AR
  208. };
  209. /*
  210. * keep track of all children of a given parent character
  211. */
  212. class Fitter : public NonCopyable
  213. {
  214. public:
  215. Fitter()
  216. {
  217. distances = NULL;
  218. size = 0;
  219. reserved_node = -1;
  220. src = NULL;
  221. parent = -1;
  222. }
  223. ~Fitter() {Reset();}
  224. void Reset()
  225. {
  226. if (distances != NULL)
  227. {
  228. OP_DELETEA(distances);
  229. distances = NULL;
  230. }
  231. }
  232. CHECK_RESULT(OP_STATUS Parse(TrieBranch *b, int parent));
  233. CHECK_RESULT(OP_STATUS ParseAll(TrieBranch *b));
  234. void AddNode(char node);
  235. int Fit(TrieBranch *b, int start);
  236. int GetSize()
  237. {
  238. if (distances == NULL)
  239. return 0;
  240. return size;
  241. }
  242. int GetOrigin()
  243. {
  244. return size > 0 ? distances[0] : 0;
  245. }
  246. int GetOffset(int pos)
  247. {
  248. return pos == 0 ? 0 : distances[pos];
  249. }
  250. int GetAOffset()
  251. {
  252. if (size <= 0)
  253. return 0;
  254. return reserved_node;
  255. }
  256. private:
  257. int *distances;
  258. int size;
  259. int reserved_node;
  260. TrieBranch *src;
  261. int parent;
  262. };
  263. /*
  264. * pointer to the given character in a branch,
  265. * loads/uloads the branches automatically as needed
  266. */
  267. class NodePointer
  268. {
  269. public:
  270. NodePointer(const NodePointer &copy)
  271. {
  272. BSCache::Item *cp = NULL;
  273. act = copy.act;
  274. if (copy.branch != NULL)
  275. {
  276. act->Load(&cp, copy.branch);
  277. branch = (TrieBranch *)cp;
  278. }
  279. else
  280. branch = NULL;
  281. offset = copy.offset;
  282. parent = copy.parent;
  283. }
  284. NodePointer(ACT *owner)
  285. {
  286. act = owner;
  287. branch = NULL;
  288. offset = -1;
  289. parent = -1;
  290. }
  291. ~NodePointer()
  292. {
  293. Reset();
  294. }
  295. NodePointer &operator=(const NodePointer &copy)
  296. {
  297. act = copy.act;
  298. Reset(copy.branch);
  299. offset = copy.offset;
  300. parent = copy.parent;
  301. return *this;
  302. }
  303. typedef BSCache::Item::DiskId DiskId;
  304. CHECK_RESULT(OP_STATUS Reset(DiskId block_no));
  305. void Reset(TrieBranch *b = NULL);
  306. BOOL ValidChar(char position);
  307. CHECK_RESULT(OP_STATUS Goto(char position));
  308. CHECK_RESULT(OP_STATUS NewNode(int position));
  309. int GetChildrenSize(int node_parent = 0);
  310. BOOL Reposition(int node_parent = 0, char next_char = 0);
  311. BOOL Merge(int branch_parent);
  312. CHECK_RESULT(OP_STATUS Move(int move_parent));
  313. CHECK_RESULT(OP_STATUS MoveChildren(NodePointer src, NodePointer dst, unsigned char next_char, BOOL *parent_moved));
  314. CHECK_RESULT(static OP_STATUS GetSubTree(char **result, int *count, NodePointer t, int max));
  315. CHECK_RESULT(static OP_STATUS GetSubTree(ACT::WordID *result, int *count, NodePointer t, int max));
  316. CHECK_RESULT(static OP_STATUS GetFirstEntry(ACT::PrefixResult &result, NodePointer t));
  317. CHECK_RESULT(static OP_BOOLEAN GetNextEntry(ACT::PrefixResult &result, NodePointer t, const char *prev_str));
  318. BOOL IsFree () const { return branch->IsFree(offset); }
  319. BOOL IsWord () const { return branch->IsWord(offset); }
  320. BOOL IsFinal () const { return branch->IsFinal(offset); }
  321. BOOL HasChild () const { return branch->HasChild(offset); }
  322. BOOL HasDiskChild () const { return branch->HasDiskChild(offset); }
  323. BOOL HasMemoryChild () const { return branch->HasMemoryChild(offset); }
  324. int GetOffset () const { return branch->GetOffset(offset); }
  325. DiskId GetDiskChild () const { return branch->GetDiskChild(offset); }
  326. TrieBranch* GetMemoryChild () const { return branch->GetMemoryChild(offset); }
  327. int GetParent () const { return branch->GetParent(offset); }
  328. ACT::WordID GetId () const { return branch->GetId(offset); }
  329. void SetFree () { branch->SetFree(offset); }
  330. void SetFinal () { branch->SetFinal(offset); }
  331. void SetFinalWord () { branch->SetFinalWord(offset); }
  332. void SetIsWord () { branch->SetIsWord(offset); }
  333. void SetIsNotWord () { branch->SetIsNotWord(offset); }
  334. void SetOffset (int o) { branch->SetOffset(offset, o); }
  335. void SetDiskChild (DiskId dc) { branch->SetDiskChild(offset, dc); }
  336. void SetMemoryChild (TrieBranch *mc) { branch->SetMemoryChild(offset, mc); }
  337. void SetParent (int p) { branch->SetParent(offset, p); }
  338. void SetId (ACT::WordID id) { branch->SetId(offset, id); }
  339. CHECK_RESULT(OP_STATUS Flush(TrieBranch *branch));
  340. int GetSuperParent(int o)
  341. {
  342. register int p;
  343. while ((p = branch->GetParent(o)) != 0)
  344. o = p;
  345. return o;
  346. }
  347. int GetCurrentParent()
  348. {
  349. return parent;
  350. }
  351. int GetCurrentOffset()
  352. {
  353. return offset;
  354. }
  355. void SetCurrentOffset(int o)
  356. {
  357. offset = o;
  358. }
  359. DiskId GetCurrentBranch()
  360. {
  361. return branch->disk_id;
  362. }
  363. TrieBranch *GetCurrentPointer()
  364. {
  365. return branch;
  366. }
  367. private:
  368. ACT *act;
  369. TrieBranch *branch;
  370. int offset;
  371. int parent;
  372. };
  373. #endif // ACTUTIL_H