Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BTreeBase.h 13KB


  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #ifndef BTREEBASE_H
  9. #define BTREEBASE_H
  10. #include "modules/search_engine/TypeDescriptor.h"
  11. #include "modules/search_engine/BSCache.h"
  12. #include "modules/search_engine/ResultBase.h"
  13. class PoolBase;
  14. /**
  15. * @brief general type-less B-tree implementation as a base for a thin template TBTree
  16. * @author Pavel Studeny <pavels@opera.com>
  17. */
  18. class BTreeBase
  19. {
  20. public:
  21. /**
  22. * Used internally by BTree.
  23. * One branch of the BTree containing at maximum
  24. * SIZE KEYs and SIZE + 1 POINTERs to child branches.
  25. *
  26. @verbatim
  27. KEY1 KEY2 ... KEYmax
  28. / | | | \
  29. @endverbatim
  30. */
  31. struct BTreeBranch : public BSCache::Item
  32. {
  33. BTreeBase *owner;
  34. int size;
  35. BTreeBranch *parent;
  36. PoolBase *cache;
  37. char *data;
  38. BTreeBranch(PoolBase *cache, OpFileLength id, BTreeBranch *rbranch, unsigned short nur);
  39. virtual ~BTreeBranch();
  40. CHECK_RESULT(OP_STATUS Construct(BlockStorage *storage));
  41. CHECK_RESULT(virtual OP_STATUS Read(BlockStorage *storage));
  42. CHECK_RESULT(virtual OP_STATUS Flush(BlockStorage *storage));
  43. virtual void OnIdChange(DiskId new_id, DiskId old_id);
  44. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  45. virtual size_t EstimateMemoryUsed() const;
  46. #endif
  47. void CopyData(const BTreeBranch* src);
  48. inline char* Data() { return data; }
  49. inline const char* Data() const { return data; }
  50. inline int DataSize() const { return sizeof(DiskId) + PtrItemSize() * size; }
  51. inline char* PtrItem(int i) { return data + sizeof(DiskId) + PtrItemSize() * i; }
  52. inline const char* PtrItem(int i) const { return data + sizeof(DiskId) + PtrItemSize() * i; }
  53. inline int PtrItemSize() const { return sizeof(DiskId) + ItemSize(); }
  54. inline char* GetItem(int i) { return PtrItem(i) + sizeof(DiskId); }
  55. inline const char* GetItem(int i) const { return PtrItem(i) + sizeof(DiskId); }
  56. inline int ItemSize() const ;
  57. inline DiskId& Pointer(int i) { return *reinterpret_cast< DiskId *>(PtrItem(i)); }
  58. inline DiskId Pointer(int i) const { return *reinterpret_cast<const DiskId *>(PtrItem(i)); }
  59. inline DiskId& Rightmost() { return *reinterpret_cast< DiskId *>(data); }
  60. inline DiskId Rightmost() const { return *reinterpret_cast<const DiskId *>(data); }
  61. protected:
  62. friend class PoolBase;
  63. };
  64. /**
  65. * delete all data; if KEY is a pointer, doesn't delete the pointer itself
  66. */
  67. virtual ~BTreeBase(void);
  68. /**
  69. * insert a single item into the BTree
  70. * @param item data to insert
  71. * @param overwrite_existing makes sense only if KEY contains some data not included in comparison
  72. */
  73. CHECK_RESULT(OP_STATUS Insert(const void *item, BOOL overwrite_existing = FALSE));
  74. /**
  75. * delete a single item from the BTree
  76. * @param item data to delete
  77. * @return OpBoolean::IS_FALSE if the item wasn't found
  78. */
  79. CHECK_RESULT(OP_BOOLEAN Delete(const void *item));
  80. /**
  81. * delete a range of items
  82. * @param first item to delete from
  83. * @param o1 operatorGE: delete including first, operatorGT: delete after first, other: delete from the beginning
  84. * @param last item to delete until
  85. * @param o2 operatorLE: delete including last, operatorLT: delete before last, other: delete until the end
  86. */
  87. CHECK_RESULT(OP_STATUS Delete(const void *first, SearchOperator o1, const void *last, SearchOperator o2));
  88. /**
  89. * search for the given item
  90. * @param item contains an item to search for on input and the found item on output, if found
  91. * @return OpBoolean::IS_TRUE if the item was found; OpStatus::OK if the item wasn't found - item contains the next item found; OpBoolean::IS_FALSE if BTree is empty or item wasn't found and there is no next item
  92. */
  93. CHECK_RESULT(OP_BOOLEAN Search(void *item));
  94. /**
  95. * look up the first item
  96. * @param item contains the found item on output, if found
  97. * @return OpBoolean::IS_TRUE if the item was found, OpBoolean::IS_FALSE if BTree is empty
  98. */
  99. CHECK_RESULT(OP_BOOLEAN GetFirst(void *item));
  100. /**
  101. * look up the last item
  102. * @param item contains the found item on output, if found
  103. * @return OpBoolean::IS_TRUE if the item was found, OpBoolean::IS_FALSE if BTree is empty
  104. */
  105. CHECK_RESULT(OP_BOOLEAN GetLast(void *item));
  106. /**
  107. * find all elements for which (element oper item) is TRUE
  108. * @param item a key to compare
  109. * @param oper operator to perform, one of operatorLT, operatorLE, operatorEQ, operatorGE, operatorGT
  110. * @return NULL on error
  111. */
  112. IteratorBase *Search(const void *item, SearchOperator oper);
  113. /**
  114. * @return all items in the B-tree beginning with the first one
  115. */
  116. IteratorBase *SearchFirst(void);
  117. /**
  118. * @return all items in the B-tree beginning with the last one
  119. */
  120. IteratorBase *SearchLast(void);
  121. /**
  122. * @return YES if the B-tree is empty, NO if it contains at least 1 item and MAYBE on error
  123. */
  124. BOOL3 Empty(void);
  125. /**
  126. * @return position of the root branch in the PoolBase
  127. */
  128. BSCache::Item::DiskId GetId(void);
  129. /**
  130. * reset this instance to another root when transaction is being aborted
  131. * @param new_root 0 to invalidate the written data
  132. */
  133. void Renew(BSCache::Item::DiskId new_root = 0)
  134. {
  135. this->m_root = new_root;
  136. }
  137. /**
  138. * empty the BTree
  139. */
  140. CHECK_RESULT(OP_STATUS Clear(void));
  141. /**
  142. * @return an estimate of the memory used by this data structure
  143. */
  144. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  145. size_t EstimateMemoryUsed() const;
  146. #endif
  147. /**
  148. * @brief methods to be used by TBTree::TResult
  149. */
  150. class Result
  151. {
  152. public:
  153. CHECK_RESULT(OP_STATUS Init(BTreeBase *owner, BTreeBranch *current, int pos,
  154. SearchOperator oper, const void *key));
  155. protected:
  156. BOOL _Next(void);
  157. BOOL _Prev(void);
  158. int _Count(void) const;
  159. BOOL _End(void) const;
  160. BOOL _Beginning(void) const;
  161. const void *_Get(void);
  162. void Free(void);
  163. OP_STATUS m_last_error;
  164. BTreeBase *m_btree;
  165. BTreeBranch *m_current;
  166. int m_pos;
  167. SearchOperator m_oper;
  168. char *m_key_buf;
  169. LoopDetector<BSCache::Item::DiskId> m_loop_detector;
  170. };
  171. friend class Result;
  172. friend class PoolBase;
  173. friend struct BTreeBranch;
  174. protected:
  175. /**
  176. * create an empty BTree
  177. */
  178. BTreeBase(const TypeDescriptor &allocator, PoolBase *cache, BSCache::Item::DiskId root);
  179. /**
  180. * create a new result for the given key and operator
  181. */
  182. virtual IteratorBase *NewResult(BTreeBranch *current, int pos, SearchOperator oper, const void *key) {return NULL;}
  183. /**
  184. * create a new branch and increase its reference counter
  185. * @param branch created branch
  186. * @param parent parent of the new branch or 0 for root
  187. */
  188. CHECK_RESULT(virtual OP_STATUS NewBranch(BTreeBranch **branch, BTreeBranch *parent));
  189. /**
  190. * delete the branch and all its subbranches
  191. */
  192. CHECK_RESULT(OP_STATUS DeleteBranch(BSCache::Item::DiskId branch));
  193. /**
  194. * erase the branch and decrase its reference counter
  195. */
  196. CHECK_RESULT(OP_STATUS DeleteLeaf(BTreeBranch *leaf));
  197. /**
  198. * adjust root branch if empty
  199. */
  200. CHECK_RESULT(OP_STATUS RebalanceRoot(void));
  201. /**
  202. * change parent in case of splitting one branch
  203. */
  204. void SetParent(BSCache::Item::DiskId branch, BTreeBranch *parent);
  205. /**
  206. * load a new branch instead of the previous one
  207. * @param rv loaded branch
  208. * @param branch new branch to go
  209. * @param previous a branch to release together with its parents or NULL if there is no such branch
  210. */
  211. CHECK_RESULT(OP_STATUS Goto(BTreeBase::BTreeBranch **rv, BSCache::Item::DiskId branch, BTreeBase::BTreeBranch *previous));
  212. /**
  213. * load a new branch instead of the previous one
  214. * @param branch new branch to go or NULL to just release the previous
  215. * @param previous a branch to release together with its parents or NULL if there is no such branch
  216. */
  217. BTreeBranch *GotoB(BTreeBranch *branch, BTreeBranch *previous);
  218. /**
  219. * set the current branch as modified; newly created branch is always modified
  220. */
  221. CHECK_RESULT(OP_STATUS Modify(BTreeBranch *branch));
  222. /**
  223. * @return POINTER which is guaranteed not to change
  224. */
  225. BSCache::Item::DiskId SafePointer(BTreeBranch *branch);
  226. /**
  227. * @return index of entry in branch >= item
  228. */
  229. // int BinarySearch(BTreeBranch *branch, const void *item);
  230. /**
  231. * search for the item in the whole BTree
  232. * @param rbranch branch containing the KEY or where to insert it
  233. * @param rpos position of the KEY in rbranch or position where to insert it
  234. * @param item KEY to search
  235. * @return IS_TRUE if found
  236. */
  237. CHECK_RESULT(OP_BOOLEAN SearchItem(BTreeBranch **rbranch, int *rpos, const void *item));
  238. /**
  239. * find the least item greater than the given one
  240. * @return IS_FALSE on eond of data
  241. */
  242. CHECK_RESULT(OP_BOOLEAN SearchNextItem(BTreeBranch **rbranch, int *rpos, const void *item));
  243. /**
  244. * insert the KEY to the given branch and postion, doesn't release the branch
  245. * @param branch relese the branch after return from this method by Goto(0, branch)
  246. * @param pos position returned by SearchItem or BinarySearch
  247. * @param item KEY to insert
  248. * @param overflow new branch in case that insertion caused a full branch to overflow
  249. */
  250. CHECK_RESULT(OP_STATUS InsertItem(BTreeBranch *branch, int pos, const void *item, BSCache::Item::DiskId overflow = 0));
  251. /**
  252. * delete item from any branch
  253. * @param branch branch containing the KEY to delete
  254. * @param pos position of the KEY in the branch
  255. */
  256. CHECK_RESULT(OP_STATUS DeleteItem(BTreeBranch *branch, int pos));
  257. /**
  258. * delete item from a leaf or from a parent of merged branch - the KEY's POINTER is ignored
  259. * @param branch branch containing the KEY to delete
  260. * @param pos position of the KEY in the branch
  261. */
  262. CHECK_RESULT(OP_STATUS RemoveItem(BTreeBranch *branch, int pos));
  263. /**
  264. * balance the number of items between two neighbor branches
  265. * @param l_child left branch
  266. * @param parent parent branch
  267. * @param g_child reight branch
  268. */
  269. void Rotate(BTreeBranch *l_child, BTreeBranch *parent, BTreeBranch *g_child);
  270. /**
  271. * join two branches into one, the non-destination child is deleted
  272. * @param dst destination branch, it's supposed to be l_child or g_child
  273. * @param l_child left branch
  274. * @param parent parent branch
  275. * @param g_child reight branch
  276. */
  277. CHECK_RESULT(OP_STATUS Merge(BTreeBranch *dst, BTreeBranch *l_child, BTreeBranch *parent, BTreeBranch *g_child));
  278. /**
  279. * @return TRUE if given branch has no children
  280. */
  281. BOOL IsLeaf(BTreeBranch *branch) const {return branch->Rightmost() == 0;}
  282. /**
  283. * @return TRUE if given branch has no parent
  284. */
  285. BOOL IsRoot(BTreeBranch *branch) const {return branch->parent == NULL;}
  286. inline int BTreeSize() const;
  287. #ifdef _DEBUG
  288. inline BOOL CheckIntegrity(const BTreeBranch *branch) const;
  289. #endif
  290. TypeDescriptor m_allocator;
  291. BSCache::Item::DiskId m_root;
  292. PoolBase *m_cache;
  293. OpFileLength m_reserved_id;
  294. };
  295. /**
  296. * @brief cache holding several B-trees in a file
  297. * @author Pavel Studeny <pavels@opera.com>
  298. */
  299. class PoolBase : public BSCache
  300. {
  301. public:
  302. PoolBase(int max_cache_branches, const TypeDescriptor &allocator);
  303. /**
  304. * PoolBase must be opened before you call any other method
  305. * @param path file storing the data; file is always created if it doesn't exist
  306. * @param mode Read/ReadWrite mode
  307. * @param blocksize one block consists of 12 B of internal BlockStorage data, 4 B rightmost pointer and the rest is divided into (sizeof(data) + 4 B pointer) chunks
  308. * @param folder might be one of predefind folders
  309. */
  310. CHECK_RESULT(OP_STATUS Open(const uni_char* path, BlockStorage::OpenMode mode, int blocksize = 512, OpFileFolder folder = OPFILE_ABSOLUTE_FOLDER));
  311. /**
  312. * flush all unsaved data, commit any pending transaction and close the file
  313. */
  314. CHECK_RESULT(OP_STATUS Close(void));
  315. /**
  316. * abort all write operations since the last Commit
  317. */
  318. CHECK_RESULT(OP_STATUS Abort(void));
  319. /**
  320. * write all modified data to disk
  321. */
  322. CHECK_RESULT(OP_STATUS Commit(void));
  323. /**
  324. * @return an estimate of the memory used by this data structure
  325. */
  326. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  327. virtual size_t EstimateMemoryUsed() const;
  328. #endif
  329. friend class BTreeBase;
  330. friend struct BTreeBase::BTreeBranch;
  331. protected:
  332. virtual BSCache::Item *NewMemoryItem(int id, Item *rbranch, int rnode, unsigned short nur);
  333. virtual BSCache::Item *NewDiskItem(OpFileLength id, unsigned short nur);
  334. BTreeBase::BTreeBranch *LoadFromCache(BSCache::Item::DiskId id);
  335. TypeDescriptor m_allocator;
  336. public:
  337. /**
  338. * @param root If the file holds only one BTree (e.g. a SingleBTree), you should specify
  339. * the root node (typically == 2) to enable more consistency checks. Otherwise use 0.
  340. * @param thorough If TRUE, the BTree will be checked in depth for recursive sorting errors
  341. * @return OpBoolean::IS_TRUE if the BTree is consistent
  342. */
  343. CHECK_RESULT(OP_BOOLEAN CheckConsistency(UINT32 root = 0, BOOL thorough = TRUE));
  344. #if defined(LINGOGI) && (defined(_DEBUG) || defined(SELFTEST) || defined(SEARCH_ENGINE_LOG))
  345. CHECK_RESULT(OP_BOOLEAN PrintBTree(UINT32 root = 0));
  346. #endif
  347. };
  348. inline int BTreeBase::BTreeSize() const
  349. {
  350. return (int)((m_cache->m_storage.GetBlockSize() - 12 - sizeof(BSCache::Item::DiskId)) / (m_cache->m_allocator.size + sizeof(BSCache::Item::DiskId)));
  351. }
  352. inline int BTreeBase::BTreeBranch::ItemSize() const
  353. {
  354. return (int)(cache->m_allocator.size_align32);
  355. }
  356. #endif // BTREEBASE_H