Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StringTable.cpp 37KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #if defined SEARCH_ENGINE && (defined SEARCH_ENGINE_FOR_MAIL || defined SELFTEST)
  10. #include "modules/search_engine/StringTable.h"
  11. #include "modules/pi/OpSystemInfo.h"
  12. #include "modules/search_engine/ACTUtil.h"
  13. #include "modules/search_engine/WordSegmenter.h"
  14. #include "modules/util/opautoptr.h"
  15. #if !defined ADVANCED_OPINT32VECTOR && defined ADVANCED_OPVECTOR
  16. #define Subtract Substract
  17. #endif
  18. // number of items in cache which trigger PreFlush/Flush to write all available data immediately
  19. #define IGNORE_TIMEOUT_LIMIT 15000
  20. #define ACT_EXTENSION ".axx"
  21. #define ACT_EXTENSION_OLD ".ax"
  22. #define LEX_EXTENSION ".bx"
  23. CHECK_RESULT(static OP_STATUS ConcatPaths(OpString& dest, const uni_char *directory, const uni_char *filename));
  24. static OP_STATUS ConcatPaths(OpString& dest, const uni_char *directory, const uni_char *filename)
  25. {
  26. RETURN_OOM_IF_NULL(dest.Reserve((int)(uni_strlen(directory) + 1 + uni_strlen(filename) + 1)));
  27. RETURN_IF_ERROR(dest.Set(directory));
  28. if (dest.HasContent())
  29. dest.Append(PATHSEP);
  30. return dest.Append(filename);
  31. }
  32. static int act_strlen(const uni_char *w)
  33. {
  34. int len = 0;
  35. ACT::SkipNonPrintableChars(w);
  36. while (*w != 0)
  37. {
  38. ++len;
  39. ++w;
  40. ACT::SkipNonPrintableChars(w);
  41. }
  42. return len;
  43. }
  44. static int act_strcmp(const uni_char *w1, const uni_char *w2)
  45. {
  46. ACT::SkipNonPrintableChars(w1);
  47. ACT::SkipNonPrintableChars(w2);
  48. while (*w1 == *w2 && *w2 != 0)
  49. {
  50. ++w1;
  51. ++w2;
  52. ACT::SkipNonPrintableChars(w1);
  53. ACT::SkipNonPrintableChars(w2);
  54. }
  55. return (int)*w1 - (int)*w2;
  56. }
  57. static int act_strncmp(const uni_char *w1, const uni_char *w2, int max_len)
  58. {
  59. if (max_len <= 0)
  60. return 0;
  61. ACT::SkipNonPrintableChars(w1);
  62. ACT::SkipNonPrintableChars(w2);
  63. while (--max_len > 0 && *w1 == *w2 && *w2 != 0)
  64. {
  65. ++w1;
  66. ++w2;
  67. ACT::SkipNonPrintableChars(w1);
  68. ACT::SkipNonPrintableChars(w2);
  69. }
  70. return (int)*w1 - (int)*w2;
  71. }
  72. static int SwitchINT32Endian(void *data, int size, void *user_arg)
  73. {
  74. BlockStorage::SwitchEndian(data, 4);
  75. return 4;
  76. }
  77. OP_BOOLEAN StringTable::Open(const uni_char *path, const uni_char *table_name, int flags)
  78. {
  79. OpString act_path, act_old_path, lex_path;
  80. OpString filename;
  81. OP_STATUS err;
  82. BOOL exist;
  83. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  84. OpString journalname;
  85. RETURN_IF_ERROR(filename.SetConcat(table_name, UNI_L(".log")));
  86. RETURN_IF_ERROR(ConcatPaths(journalname, OpStringC(path), filename));
  87. if (m_log != NULL)
  88. OP_DELETE(m_log);
  89. SearchEngineLog::ShiftLogFile(10, journalname);
  90. if ((m_log = SearchEngineLog::CreateLog(SearchEngineLog::File, journalname.CStr())) == NULL)
  91. return OpStatus::ERR;
  92. #endif
  93. RETURN_IF_ERROR(filename.SetConcat(table_name, UNI_L(ACT_EXTENSION)));
  94. RETURN_IF_ERROR(ConcatPaths(act_path, path, filename.CStr()));
  95. RETURN_IF_ERROR(filename.SetConcat(table_name, UNI_L(ACT_EXTENSION_OLD)));
  96. RETURN_IF_ERROR(ConcatPaths(act_old_path, path, filename.CStr()));
  97. if (BlockStorage::FileExists(act_path.CStr()) == OpBoolean::IS_FALSE &&
  98. BlockStorage::FileExists(act_old_path.CStr()) == OpBoolean::IS_TRUE)
  99. RETURN_IF_ERROR(BlockStorage::RenameStorage(act_old_path.CStr(), act_path.CStr()));
  100. RETURN_IF_ERROR(filename.SetConcat(table_name, UNI_L(LEX_EXTENSION)));
  101. RETURN_IF_ERROR(ConcatPaths(lex_path, path, filename.CStr()));
  102. exist = BlockStorage::FileExists(lex_path.CStr()) == OpBoolean::IS_TRUE && BlockStorage::FileExists(act_path.CStr()) == OpBoolean::IS_TRUE;
  103. // We can't have one without the other:
  104. if (!exist && BlockStorage::FileExists(lex_path.CStr()) == OpBoolean::IS_TRUE)
  105. RETURN_IF_ERROR(BlockStorage::DeleteFile(lex_path.CStr()));
  106. if (!exist && BlockStorage::FileExists(act_path.CStr()) == OpBoolean::IS_TRUE)
  107. RETURN_IF_ERROR(BlockStorage::DeleteFile(act_path.CStr()));
  108. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  109. if (BlockStorage::FileExists(act_path) == OpBoolean::IS_TRUE)
  110. {
  111. m_log->WriteFile(SearchEngineLog::Debug, "ax", act_path);
  112. RETURN_IF_ERROR(journalname.SetConcat(act_path, UNI_L("-j")));
  113. if (BlockStorage::FileExists(journalname) == OpBoolean::IS_TRUE)
  114. m_log->WriteFile(SearchEngineLog::Debug, "ax-j", journalname);
  115. RETURN_IF_ERROR(journalname.SetConcat(act_path, UNI_L("-g")));
  116. if (BlockStorage::FileExists(journalname) == OpBoolean::IS_TRUE)
  117. m_log->WriteFile(SearchEngineLog::Debug, "ax-g", journalname);
  118. }
  119. if (BlockStorage::FileExists(lex_path) == OpBoolean::IS_TRUE)
  120. {
  121. m_log->WriteFile(SearchEngineLog::Debug, "bx", lex_path);
  122. RETURN_IF_ERROR(journalname.SetConcat(lex_path, UNI_L("-j")));
  123. if (BlockStorage::FileExists(journalname) == OpBoolean::IS_TRUE)
  124. m_log->WriteFile(SearchEngineLog::Debug, "bx-j", journalname);
  125. RETURN_IF_ERROR(journalname.SetConcat(lex_path, UNI_L("-g")));
  126. if (BlockStorage::FileExists(journalname) == OpBoolean::IS_TRUE)
  127. m_log->WriteFile(SearchEngineLog::Debug, "bx-g", journalname);
  128. }
  129. m_log->Write(SearchEngineLog::Debug, "Open", UNI_L("%i %s %s"), flags, table_name, path);
  130. #endif
  131. if ((err = m_act.Open(act_path.CStr(),
  132. (flags & StringTable::ReadOnly) != 0 ? BlockStorage::OpenRead : BlockStorage::OpenReadWrite)) == OpStatus::ERR_PARSING_FAILED &&
  133. (flags & StringTable::OverwriteCorrupted) != 0)
  134. {
  135. exist = FALSE;
  136. RETURN_IF_ERROR(BlockStorage::DeleteFile(act_path.CStr()));
  137. RETURN_IF_ERROR(BlockStorage::DeleteFile(lex_path.CStr()));
  138. err = m_act.Open(act_path.CStr(), (flags & StringTable::ReadOnly) != 0 ? BlockStorage::OpenRead : BlockStorage::OpenReadWrite);
  139. }
  140. RETURN_IF_ERROR(err);
  141. if ((err = m_wordbag.Open(lex_path.CStr(),
  142. (flags & StringTable::ReadOnly) != 0 ? BlockStorage::OpenRead : BlockStorage::OpenReadWrite)) == OpStatus::ERR_PARSING_FAILED &&
  143. (flags & StringTable::OverwriteCorrupted) != 0)
  144. {
  145. exist = FALSE;
  146. OpStatus::Ignore(m_act.Close());
  147. RETURN_IF_ERROR(BlockStorage::DeleteFile(act_path.CStr()));
  148. RETURN_IF_ERROR(BlockStorage::DeleteFile(lex_path.CStr()));
  149. RETURN_IF_ERROR(m_act.Open(act_path.CStr(), (flags & StringTable::ReadOnly) != 0 ? BlockStorage::OpenRead : BlockStorage::OpenReadWrite));
  150. if (OpStatus::IsError(err = m_wordbag.Open(lex_path.CStr(),
  151. (flags & StringTable::ReadOnly) != 0 ? BlockStorage::OpenRead : BlockStorage::OpenReadWrite)))
  152. err = m_act.Clear();
  153. }
  154. if (OpStatus::IsError(err))
  155. {
  156. OpStatus::Ignore(m_act.Close());
  157. return err;
  158. }
  159. if (!m_wordbag.GetStorage()->IsNativeEndian())
  160. m_wordbag.GetStorage()->SetOnTheFlyCnvFunc(&SwitchINT32Endian);
  161. m_flags = (flags & OpenFlagMask) | CachesSorted | CachesMerged; // empty caches are sorted and merged
  162. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  163. if (exist && (err = CheckConsistency()) != OpBoolean::IS_TRUE)
  164. m_log->Write(SearchEngineLog::Emerg, "inconsistent index", "%i", err);
  165. #endif
  166. //#if defined(SELFTEST) || defined(_DEBUG)
  167. // if (exist)
  168. // {
  169. // OP_ASSERT(CheckConsistency() == OpBoolean::IS_TRUE);
  170. // }
  171. //#endif
  172. return exist ? OpBoolean::IS_TRUE : OpBoolean::IS_FALSE;
  173. }
  174. OP_STATUS StringTable::Close(BOOL force_close)
  175. {
  176. OP_STATUS err;
  177. OpString wb_name;
  178. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  179. if (m_log != NULL)
  180. {
  181. m_log->Write(SearchEngineLog::Debug, "Close", "");
  182. OP_DELETE(m_log);
  183. m_log = NULL;
  184. }
  185. #endif
  186. if (!m_wordbag.GetStorage()->IsOpen())
  187. return OpStatus::ERR_BAD_FILE_NUMBER;
  188. if (OpStatus::IsError(err = Commit()) ||
  189. (!CacheEmpty() && OpStatus::IsError(err = Commit())))
  190. {
  191. if (!force_close)
  192. return err;
  193. m_act.Abort();
  194. OpStatus::Ignore(m_act.Close());
  195. OpStatus::Ignore(m_wordbag.Abort());
  196. OpStatus::Ignore(m_wordbag.Close());
  197. m_word_cache.Clear();
  198. m_deleted_cache.Clear();
  199. return err;
  200. }
  201. if (OpStatus::IsError(err = wb_name.Set(m_wordbag.GetStorage()->GetFullName())))
  202. {
  203. if (!force_close)
  204. return err;
  205. }
  206. if (OpStatus::IsError(err = m_wordbag.Close()))
  207. {
  208. if (!force_close)
  209. return err;
  210. m_act.Abort();
  211. OpStatus::Ignore(m_act.Close());
  212. OpStatus::Ignore(m_wordbag.Abort());
  213. OpStatus::Ignore(m_wordbag.Close());
  214. }
  215. if (OpStatus::IsError(err = m_act.Close()))
  216. {
  217. if (!force_close)
  218. {
  219. OpStatus::Ignore(m_wordbag.Open(wb_name.CStr(), BlockStorage::OpenReadWrite));
  220. return err;
  221. }
  222. m_act.Abort();
  223. OpStatus::Ignore(m_act.Close());
  224. }
  225. return err;
  226. }
  227. OP_STATUS StringTable::Clear()
  228. {
  229. int i;
  230. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  231. if (m_log != NULL)
  232. m_log->Write(SearchEngineLog::Debug, "Clear", "");
  233. #endif
  234. for (i = m_word_cache.GetCount() - 1; i >= 0; i--)
  235. {
  236. FileWord* word = m_word_cache.Remove(i);
  237. OP_DELETE(word);
  238. }
  239. for (i = m_deleted_cache.GetCount() - 1; i >= 0; i--)
  240. {
  241. FileWord* word = m_deleted_cache.Remove(i);
  242. OP_DELETE(word);
  243. }
  244. m_flags |= CachesSorted | CachesMerged;
  245. m_act.Abort();
  246. RETURN_IF_ERROR(m_act.Clear());
  247. RETURN_IF_ERROR(m_wordbag.Abort());
  248. OpStatus::Ignore(m_wordbag.Flush(BSCache::ReleaseAll));
  249. return m_wordbag.GetStorage()->Clear();
  250. }
  251. class TVectorIterator : public SearchIterator<INT32>
  252. {
  253. public:
  254. TVectorIterator(TVector<INT32> *vector, BOOL take_ownership = FALSE)
  255. : m_vector(vector)
  256. , m_pos(0)
  257. , m_take_ownership(take_ownership) {}
  258. ~TVectorIterator() { if (m_take_ownership) OP_DELETE(m_vector); }
  259. virtual BOOL Next(void) {return ++m_pos < (int)m_vector->GetCount();}
  260. virtual BOOL Prev(void) {return --m_pos >= 0;}
  261. virtual const INT32 &Get(void) {return m_vector->Get(m_pos);}
  262. virtual OP_STATUS Error(void) const {return OpStatus::OK;}
  263. virtual int Count(void) const {return m_vector->GetCount();}
  264. virtual BOOL End(void) const {return (unsigned)m_pos >= m_vector->GetCount();}
  265. virtual BOOL Beginning(void) const {return m_pos < 0;}
  266. protected:
  267. TVector<INT32> *m_vector;
  268. int m_pos;
  269. BOOL m_take_ownership;
  270. };
  271. OP_BOOLEAN StringTable::PreFlush(int max_ms)
  272. {
  273. int i, j;
  274. OP_STATUS err;
  275. double time_limit;
  276. FileWord *preflush, *del_preflush, *cache_word;
  277. OP_PROBE4(OP_PROBE_SEARCH_ENGINE_STRINGTABLE_PREFLUSH);
  278. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  279. if (m_log != NULL)
  280. m_log->Write(SearchEngineLog::Debug, "PreFlush", "%i", max_ms);
  281. #endif
  282. // ignore timeout if there is too many items in cache
  283. if (m_word_cache.GetCount() + m_deleted_cache.GetCount() > IGNORE_TIMEOUT_LIMIT)
  284. max_ms = 0;
  285. if ((m_flags & PreFlushing) == 0 && CacheEmpty())
  286. return OpBoolean::IS_TRUE;
  287. time_limit = g_op_time_info->GetWallClockMS() + max_ms;
  288. // prepare caches
  289. if ((m_flags & PreFlushing) == 0)
  290. {
  291. RETURN_IF_ERROR(SortCaches());
  292. MergeCaches();
  293. RETURN_IF_ERROR(m_word_backup.CopyFrom(m_word_cache));
  294. if (OpStatus::IsError(err = m_deleted_backup.CopyFrom(m_deleted_cache)))
  295. {
  296. m_word_backup.Clear();
  297. return err;
  298. }
  299. m_word_preflush.MoveFrom(m_word_cache);
  300. m_deleted_preflush.MoveFrom(m_deleted_cache);
  301. m_word_pos = m_word_preflush.GetCount();
  302. m_deleted_pos = m_deleted_preflush.GetCount();
  303. m_flags |= PreFlushing;
  304. }
  305. m_act.SetNURFlush((m_flags & UseNUR) != 0);
  306. m_wordbag.SetNURFlush((m_flags & UseNUR) != 0);
  307. // insert (and delete in those affected by the insertion)
  308. while (--m_word_pos >= 0)
  309. {
  310. preflush = m_word_preflush[m_word_pos];
  311. TVectorIterator it(preflush->file_ids);
  312. OP_ASSERT(preflush->file_ids->GetCount() > 0);
  313. j = m_deleted_preflush.BinarySearch(preflush->word);
  314. if (j >= m_deleted_preflush.GetCount() || act_strcmp(m_deleted_preflush[j]->word, preflush->word) != 0)
  315. j = -1;
  316. if (preflush->btree == NULL && OpStatus::IsError(err = SearchForBTree(preflush)))
  317. return AbortPreFlush(err);
  318. if (OpStatus::IsError(err = preflush->btree->Insert(&it))) // add the file ids
  319. return AbortPreFlush(err);
  320. if (j >= 0) // delete the file ids
  321. {
  322. del_preflush = m_deleted_preflush[j];
  323. TVectorIterator jt(del_preflush->file_ids);
  324. // the b-tree here is the same as preflush->btree, no need to allocate a new one
  325. if (OpStatus::IsError(err = preflush->btree->Delete(&jt)))
  326. return AbortPreFlush(err);
  327. if (preflush->btree->Empty())
  328. { // this can happen only after out of memory in MergeCaches
  329. if (OpStatus::IsError(err = m_act.DeleteCaseWord(preflush->word)))
  330. if (err != OpStatus::ERR_OUT_OF_RANGE)
  331. return AbortPreFlush(err);
  332. }
  333. FileWord* word = m_deleted_preflush.Remove(j);
  334. OP_DELETE(word);
  335. m_deleted_pos = m_deleted_preflush.GetCount();
  336. }
  337. if (max_ms > 0 && g_op_time_info->GetWallClockMS() >= time_limit)
  338. return OpBoolean::IS_FALSE;
  339. }
  340. // delete
  341. while (--m_deleted_pos >= 0)
  342. {
  343. del_preflush = m_deleted_preflush[m_deleted_pos];
  344. TVectorIterator jt(del_preflush->file_ids);
  345. OP_ASSERT(del_preflush->file_ids->GetCount() > 0);
  346. if (OpStatus::IsError(err = SearchForBTree(del_preflush, TRUE)))
  347. return AbortPreFlush(err);
  348. if (del_preflush->btree == NULL || del_preflush->btree->Empty())
  349. continue; // you are probably trying to delete something that hasn't been indexed
  350. if (OpStatus::IsError(err = del_preflush->btree->Delete(&jt)))
  351. return AbortPreFlush(err);
  352. if (del_preflush->btree->Empty())
  353. {
  354. if (OpStatus::IsError(err = m_act.DeleteCaseWord(del_preflush->word)))
  355. if (err != OpStatus::ERR_OUT_OF_RANGE)
  356. return AbortPreFlush(err);
  357. }
  358. if (max_ms > 0 && g_op_time_info->GetWallClockMS() >= time_limit)
  359. return OpBoolean::IS_FALSE;
  360. }
  361. switch (err = m_wordbag.Flush((m_flags & UseNUR) != 0 ? BSCache::ReleaseNo : BSCache::JournalAll, max_ms))
  362. {
  363. case OpBoolean::IS_TRUE:
  364. break;
  365. case OpBoolean::IS_FALSE:
  366. return OpBoolean::IS_FALSE;
  367. default:
  368. return AbortPreFlush(err);
  369. }
  370. for (i = m_word_preflush.GetCount() - 1; i >= 0; --i)
  371. {
  372. preflush = m_word_preflush[i];
  373. if (preflush->is_new_word)
  374. {
  375. OP_ASSERT(preflush->btree->GetId() > 0);
  376. if ((err = m_act.AddCaseWord(preflush->word, (ACT::WordID)preflush->btree->GetId(), FALSE)) != OpBoolean::IS_TRUE)
  377. {
  378. if (err == OpBoolean::IS_FALSE)
  379. err = OpStatus::ERR_PARSING_FAILED;
  380. if (err != OpStatus::OK)
  381. return AbortPreFlush(err);
  382. if (OpStatus::IsError(err = preflush->btree->Clear()))
  383. return AbortPreFlush(err);
  384. }
  385. preflush->is_new_word = FALSE;
  386. j = m_word_cache.BinarySearch(preflush->word);
  387. if (j < m_word_cache.GetCount())
  388. {
  389. cache_word = m_word_cache[j];
  390. if (cache_word->btree == NULL && act_strcmp(preflush->word, cache_word->word) == 0)
  391. {
  392. cache_word->btree = m_wordbag.GetTree(preflush->btree->GetId());
  393. cache_word->is_new_word = FALSE;
  394. }
  395. }
  396. }
  397. }
  398. if (max_ms > 0 && g_op_time_info->GetWallClockMS() >= time_limit)
  399. return OpBoolean::IS_FALSE;
  400. switch (err = m_act.Flush((m_flags & UseNUR) != 0 ? BSCache::ReleaseNo : BSCache::JournalAll, max_ms))
  401. {
  402. case OpBoolean::IS_TRUE:
  403. break;
  404. case OpBoolean::IS_FALSE:
  405. return OpBoolean::IS_FALSE;
  406. default:
  407. return AbortPreFlush(err);
  408. }
  409. m_flags |= PreFlushed;
  410. m_flags &= ~PreFlushing;
  411. return OpBoolean::IS_TRUE;
  412. }
  413. OP_STATUS StringTable::AbortPreFlush(OP_STATUS err)
  414. {
  415. int i;
  416. FileWord *preflush;
  417. OP_ASSERT(0); // this could normally happen only on out of memory or disk
  418. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  419. if (m_log != NULL)
  420. m_log->Write(SearchEngineLog::Warning, "AbortPreFlush", "%i", err);
  421. #endif
  422. m_act.Abort();
  423. OpStatus::Ignore(m_wordbag.Abort());
  424. for (i = m_word_preflush.GetCount() - 1; i >= 0; --i)
  425. {
  426. preflush = m_word_preflush[i];
  427. if (preflush->is_new_word)
  428. preflush->btree->Renew();
  429. }
  430. m_word_preflush.Clear();
  431. m_deleted_preflush.Clear();
  432. m_word_cache.MoveFrom(m_word_backup);
  433. m_deleted_cache.MoveFrom(m_deleted_backup);
  434. m_flags &= ~PreFlushing;
  435. return err;
  436. }
  437. OP_STATUS StringTable::Flush(int max_ms)
  438. {
  439. OP_STATUS err;
  440. OP_PROBE4(OP_PROBE_SEARCH_ENGINE_STRINGTABLE_FLUSH);
  441. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  442. if (m_log != NULL)
  443. m_log->Write(SearchEngineLog::Debug, "Flush", "%i", max_ms);
  444. #endif
  445. // ignore timeout if there is too many items in cache
  446. if (m_word_cache.GetCount() + m_deleted_cache.GetCount() > IGNORE_TIMEOUT_LIMIT)
  447. max_ms = 0;
  448. if ((m_flags & PreFlushed) == 0 || (m_flags & PreFlushing) != 0)
  449. {
  450. m_flags |= UseNUR;
  451. err = PreFlush();
  452. m_flags ^= UseNUR;
  453. RETURN_IF_ERROR(err);
  454. }
  455. if (m_wordbag.GetItemCount() > 0)
  456. {
  457. switch (err = m_wordbag.Flush(BSCache::ReleaseAll, max_ms))
  458. {
  459. case OpBoolean::IS_FALSE:
  460. return OpBoolean::IS_FALSE;
  461. case OpBoolean::IS_TRUE:
  462. break;
  463. default:
  464. return AbortFlush(err);
  465. }
  466. }
  467. switch (err = m_act.Flush(BSCache::ReleaseAll, max_ms))
  468. {
  469. case OpBoolean::IS_FALSE:
  470. return OpBoolean::IS_FALSE;
  471. case OpBoolean::IS_TRUE:
  472. break;
  473. default:
  474. return AbortFlush(err);
  475. }
  476. RETURN_IF_ERROR(m_act.SaveStatus());
  477. m_word_backup.Clear();
  478. m_deleted_backup.Clear();
  479. m_deleted_preflush.Clear();
  480. m_word_preflush.Clear();
  481. m_flags ^= PreFlushed;
  482. return OpBoolean::IS_TRUE;
  483. }
  484. OP_STATUS StringTable::AbortFlush(OP_STATUS err)
  485. {
  486. OP_ASSERT(0); // this could normally happen only on out of memory or disk
  487. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  488. if (m_log != NULL)
  489. m_log->Write(SearchEngineLog::Warning, "AbortFlush", "%i", err);
  490. #endif
  491. m_act.Abort();
  492. OpStatus::Ignore(m_wordbag.Abort());
  493. m_word_preflush.Clear();
  494. m_deleted_preflush.Clear();
  495. m_word_cache.MoveFrom(m_word_backup);
  496. m_deleted_cache.MoveFrom(m_deleted_backup);
  497. m_flags &= ~PreFlushed;
  498. return err;
  499. }
  500. OP_STATUS StringTable::Commit(void)
  501. {
  502. OP_PROBE4(OP_PROBE_SEARCH_ENGINE_STRINGTABLE_COMMIT);
  503. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  504. if (m_log != NULL)
  505. m_log->Write(SearchEngineLog::Debug, "Commit", "");
  506. #endif
  507. if ((!CacheEmpty() &&
  508. !m_wordbag.GetStorage()->InTransaction()) ||
  509. (m_word_preflush.GetCount() > 0 || m_deleted_preflush.GetCount() > 0))
  510. RETURN_IF_ERROR(Flush());
  511. RETURN_IF_ERROR(m_wordbag.Commit());
  512. return m_act.Commit();
  513. //#if defined(_DEBUG) || defined(SELFTEST)
  514. // OP_ASSERT(m_wordbag.CheckConsistency() == OpBoolean::IS_TRUE);
  515. //#endif
  516. }
  517. BOOL StringTable::CacheEmpty(void)
  518. {
  519. return m_word_cache.GetCount() == 0 && m_deleted_cache.GetCount() == 0;
  520. }
  521. OP_STATUS StringTable::InsertBlock(WordCache &cache, const uni_char *words, INT32 file_id)
  522. {
  523. WordSegmenter msgtok;
  524. OpString word;
  525. OP_BOOLEAN e;
  526. register const uni_char *chk;
  527. RETURN_IF_ERROR(msgtok.Set(words));
  528. RETURN_IF_ERROR((e = msgtok.GetNextToken(word)));
  529. while (e == OpBoolean::IS_TRUE)
  530. {
  531. for (chk = word.CStr(); *chk != 0; ++chk)
  532. if ((UINT16)*chk <= FIRST_CHAR)
  533. break;
  534. if (*chk != 0)
  535. {
  536. RETURN_IF_ERROR((e = msgtok.GetNextToken(word)));
  537. continue;
  538. }
  539. // highest 3 bits are flags
  540. if ((file_id & 0xE0000000) != 0)
  541. RETURN_IF_ERROR(Insert(cache, word.CStr(), file_id & 0x1FFFFFFF));
  542. RETURN_IF_ERROR(Insert(cache, word.CStr(), file_id));
  543. RETURN_IF_ERROR((e = msgtok.GetNextToken(word)));
  544. }
  545. return OpStatus::OK;
  546. }
  547. OP_STATUS StringTable::Insert(WordCache &cache, const uni_char *word, INT32 file_id)
  548. {
  549. int i;
  550. uni_char *up_word;
  551. OpString temp_upper;
  552. register FileWord *w;
  553. OP_STATUS err;
  554. WordCache *backup_cache;
  555. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  556. if (m_log != NULL)
  557. m_log->Write(SearchEngineLog::Debug, &cache == &m_word_cache ? "Insert" : "Delete", UNI_L("%.08X %s"), file_id, word);
  558. #endif
  559. m_flags &= ~(CachesSorted | CachesMerged);
  560. if ((m_flags & CaseSensitive) == 0)
  561. {
  562. RETURN_IF_ERROR(temp_upper.Set(word));
  563. temp_upper.MakeUpper();
  564. up_word = (uni_char *)temp_upper.CStr();
  565. }
  566. else up_word = (uni_char *)word;
  567. backup_cache = &cache == &m_word_cache ? &m_word_backup : &m_deleted_backup;
  568. i = cache.BinarySearch(up_word);
  569. if (i >= cache.GetCount() || act_strcmp(up_word, cache[i]->word) != 0)
  570. {
  571. RETURN_OOM_IF_NULL(w = FileWord::Create(up_word, file_id));
  572. if (OpStatus::IsError(err = cache.Insert(i, w)))
  573. {
  574. OP_DELETE(w);
  575. return err;
  576. }
  577. }
  578. else
  579. RETURN_IF_ERROR(cache[i]->Add(file_id));
  580. if ((m_flags & PreFlushing) != 0)
  581. {
  582. i = backup_cache->BinarySearch(up_word);
  583. if (i >= backup_cache->GetCount() || act_strcmp(up_word, (*backup_cache)[i]->word) != 0)
  584. {
  585. RETURN_OOM_IF_NULL(w = FileWord::Create(up_word, file_id));
  586. if (OpStatus::IsError(err = backup_cache->Insert(i, w)))
  587. {
  588. OP_DELETE(w);
  589. return err;
  590. }
  591. }
  592. else
  593. RETURN_IF_ERROR((*backup_cache)[i]->Add(file_id));
  594. }
  595. return OpStatus::OK;
  596. }
  597. OP_STATUS StringTable::Delete(const uni_char *word, const OpINT32Vector &file_ids)
  598. {
  599. OpINT32Vector found_file_ids;
  600. RETURN_IF_ERROR(Search(word, &found_file_ids));
  601. RETURN_IF_ERROR(found_file_ids.Intersect(file_ids));
  602. for (UINT32 i = 0; i < found_file_ids.GetCount(); i++)
  603. RETURN_IF_ERROR(Delete(word, found_file_ids.Get(i)));
  604. return OpStatus::OK;
  605. }
  606. OP_STATUS StringTable::Delete(const OpINT32Vector &file_ids)
  607. {
  608. // search for all words
  609. OpAutoPtr<SearchIterator<ACT::PrefixResult> > search_iterator(m_act.PrefixSearch(""));
  610. if (!search_iterator.get())
  611. return OpStatus::ERR_NO_MEMORY;
  612. if (!search_iterator->Empty())
  613. {
  614. OpString word16;
  615. do
  616. {
  617. RETURN_IF_ERROR(word16.Set(search_iterator->Get().utf8_word));
  618. RETURN_IF_ERROR(Delete(word16.CStr(), file_ids));
  619. }
  620. while (search_iterator->Next());
  621. }
  622. // Also words in the cache
  623. WordCache &search_cache = (m_flags & PreFlushing) == 0 ? m_word_cache : m_word_backup;
  624. // search_cache can shrink as we go, so make copy of the words
  625. OpVector<uni_char> tmp_copy(search_cache.GetCount());
  626. int i;
  627. for (i = 0; i < search_cache.GetCount(); i++)
  628. RETURN_IF_ERROR(tmp_copy.Add(UniSetNewStr(search_cache.Get(i)->word)));
  629. for (i = 0; i < (int)tmp_copy.GetCount(); i++)
  630. {
  631. RETURN_IF_ERROR(Delete(tmp_copy.Get(i), file_ids));
  632. OP_DELETEA(tmp_copy.Get(i));
  633. }
  634. return OpStatus::OK;
  635. }
  636. OP_STATUS StringTable::Delete(const uni_char *word)
  637. {
  638. TVector<INT32> ids;
  639. RETURN_IF_ERROR(Search(word, &ids));
  640. for (UINT32 i=0; i<ids.GetCount(); i++)
  641. RETURN_IF_ERROR(Delete(word, ids.Get(i)));
  642. return OpStatus::OK;
  643. }
  644. OP_STATUS StringTable::SearchForBTree(FileWord *fw, BOOL must_exist)
  645. {
  646. ACT::WordID btree_id;
  647. btree_id = m_act.CaseSearch(fw->word);
  648. if (btree_id == 0)
  649. {
  650. if (!must_exist)
  651. RETURN_OOM_IF_NULL(fw->btree = m_wordbag.CreateTree());
  652. }
  653. else {
  654. RETURN_OOM_IF_NULL(fw->btree = m_wordbag.GetTree(btree_id));
  655. }
  656. fw->is_new_word = btree_id == 0;
  657. return OpStatus::OK;
  658. }
  659. OP_STATUS StringTable::Search(const uni_char *word, TVector<INT32> *result, int prefix_search)
  660. {
  661. int i, j;
  662. uni_char *up_word;
  663. OpString temp_upper;
  664. ACT::WordID wbpos;
  665. ACT::WordID *act_result;
  666. OpAutoArray<ACT::WordID> prefix_anchor;
  667. int result_count;
  668. TBTree<INT32> *btree;
  669. SearchIterator<INT32> *it;
  670. WordCache *search_cache;
  671. FileWord *cache_word;
  672. OP_PROBE4(OP_PROBE_SEARCH_ENGINE_STRINGTABLE_SEARCH);
  673. if (word == NULL || *word == 0)
  674. return OpStatus::OK;
  675. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  676. if (m_log != NULL)
  677. m_log->Write(SearchEngineLog::Debug, "Search", UNI_L("%i %s"), prefix_search, word);
  678. #endif
  679. RETURN_IF_ERROR(SortCaches());
  680. MergeCaches();
  681. result->Clear();
  682. if ((m_flags & CaseSensitive) == 0)
  683. {
  684. RETURN_IF_ERROR(temp_upper.Set(word));
  685. temp_upper.MakeUpper();
  686. up_word = (uni_char *)temp_upper.CStr();
  687. }
  688. else up_word = (uni_char *)word;
  689. if (prefix_search > 0)
  690. {
  691. RETURN_OOM_IF_NULL(act_result = OP_NEWA(ACT::WordID, prefix_search));
  692. prefix_anchor.reset(act_result);
  693. result_count = m_act.PrefixCaseSearch(act_result, up_word, prefix_search);
  694. }
  695. else {
  696. act_result = &wbpos;
  697. wbpos = m_act.CaseSearch(up_word);
  698. result_count = wbpos == 0 ? 0 : 1;
  699. }
  700. for (i = 0; i < result_count; ++i)
  701. {
  702. RETURN_OOM_IF_NULL(btree = m_wordbag.GetTree(act_result[i]));
  703. if ((it = btree->SearchFirst()) == NULL)
  704. {
  705. OP_DELETE(btree);
  706. return OpStatus::ERR_NO_MEMORY;
  707. }
  708. j = 0;
  709. LoopDetector<INT32> loopDetector;
  710. if (it->Count() != 0)
  711. do {
  712. if (OpStatus::IsError(loopDetector.CheckNext(it->Get())) ||
  713. OpStatus::IsError(result->Add(it->Get())))
  714. {
  715. OP_DELETE(it);
  716. OP_DELETE(btree);
  717. return OpStatus::ERR_NO_MEMORY;
  718. }
  719. } while (it->Next());
  720. OP_DELETE(it);
  721. OP_DELETE(btree);
  722. if (prefix_search > 0 && (int)result->GetCount() > 2 * prefix_search)
  723. {
  724. RETURN_IF_ERROR(result->Sort());
  725. if ((int)result->GetCount() >= prefix_search)
  726. break;
  727. }
  728. }
  729. if (prefix_search)
  730. RETURN_IF_ERROR(result->Sort());
  731. search_cache = (m_flags & PreFlushing) == 0 ? &m_word_cache : &m_word_backup;
  732. if (search_cache->GetCount() > 0)
  733. {
  734. i = search_cache->BinarySearch(up_word);
  735. if (!prefix_search)
  736. {
  737. if (i < search_cache->GetCount())
  738. {
  739. cache_word = (*search_cache)[i];
  740. if (act_strcmp(cache_word->word, up_word) == 0)
  741. RETURN_IF_ERROR(result->Unite(*(cache_word->file_ids)));
  742. }
  743. }
  744. else {
  745. j = act_strlen(up_word);
  746. while (i < search_cache->GetCount())
  747. {
  748. cache_word = (*search_cache)[i];
  749. if (act_strncmp(cache_word->word, up_word, j) != 0)
  750. break;
  751. RETURN_IF_ERROR(result->Unite(*(cache_word->file_ids)));
  752. ++i;
  753. }
  754. }
  755. }
  756. search_cache = (m_flags & PreFlushing) == 0 ? &m_deleted_cache : &m_deleted_backup;
  757. if (search_cache->GetCount() > 0)
  758. {
  759. i = search_cache->BinarySearch(up_word);
  760. if (!prefix_search)
  761. {
  762. if (i < search_cache->GetCount())
  763. {
  764. cache_word = (*search_cache)[i];
  765. if (act_strcmp(cache_word->word, up_word) == 0)
  766. result->Differ(*(cache_word->file_ids));
  767. }
  768. }
  769. else {
  770. j = act_strlen(up_word);
  771. while (i < search_cache->GetCount())
  772. {
  773. cache_word = (*search_cache)[i];
  774. if (act_strncmp(cache_word->word, up_word, j) != 0)
  775. break;
  776. result->Differ(*(cache_word->file_ids));
  777. ++i;
  778. }
  779. }
  780. }
  781. if (prefix_search > 0 && (int)result->GetCount() > prefix_search)
  782. result->Remove(prefix_search);
  783. return OpStatus::OK;
  784. }
  785. OP_STATUS StringTable::WordSearch(const uni_char *word, uni_char **result, int *result_size)
  786. {
  787. if (result_size == NULL || *result_size <= 0)
  788. return OpStatus::OK;
  789. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_STRINGTABLE)
  790. if (m_log != NULL)
  791. m_log->Write(SearchEngineLog::Debug, "WordSearch", UNI_L("%i %s"), result_size, word);
  792. #endif
  793. if ((m_flags & CaseSensitive) == 0)
  794. *result_size = m_act.PrefixWords(result, word, *result_size);
  795. else *result_size = m_act.PrefixCaseWords(result, word, *result_size);
  796. return OpStatus::OK;
  797. }
  798. OP_STATUS StringTable::MultiSearch(const uni_char *words, TVector<INT32> *result, BOOL match_any, int prefix_search, int phrase_flags)
  799. {
  800. TVector<INT32> tmp_result;
  801. OpAutoPtr< TVector<uni_char *> > tokens;
  802. int i;
  803. if (words == NULL || *words == 0)
  804. return OpStatus::OK;
  805. {
  806. WordSegmenter msgtok;
  807. BOOL last_is_prefix;
  808. RETURN_IF_ERROR(msgtok.Set(words));
  809. tokens = msgtok.Parse(&last_is_prefix);
  810. RETURN_OOM_IF_NULL(tokens.get());
  811. if (!last_is_prefix)
  812. prefix_search = 0;
  813. }
  814. for (i = 0; i < (int)tokens->GetCount(); ++i)
  815. {
  816. if (i == 0 || (result->GetCount() == 0 && match_any))
  817. {
  818. RETURN_IF_ERROR(Search(tokens->Get(i), result, i == (int)tokens->GetCount() - 1 ? prefix_search * 16 : 0));
  819. }
  820. else {
  821. RETURN_IF_ERROR(Search(tokens->Get(i), &tmp_result, i == (int)tokens->GetCount() - 1 ? prefix_search * 16 : 0));
  822. if (match_any)
  823. RETURN_IF_ERROR(result->Unite(tmp_result));
  824. else {
  825. RETURN_IF_ERROR(result->Intersect(tmp_result));
  826. if (result->GetCount() == 0)
  827. break; // doesn't make sense to search further
  828. }
  829. }
  830. }
  831. tokens.reset();
  832. #ifdef SEARCH_ENGINE_PHRASESEARCH
  833. if ((phrase_flags & PhraseMatcher::AllPhrases) != 0 && m_document_source != NULL && result->GetCount() != 0 &&
  834. (m_phrase_search_cutoff == 0 || result->GetCount() <= m_phrase_search_cutoff))
  835. {
  836. if (prefix_search)
  837. phrase_flags |= PhraseMatcher::PrefixSearch;
  838. PhraseFilter<INT32> filter(words, *m_document_source, phrase_flags);
  839. if (!filter.Empty())
  840. result->Filter(filter);
  841. }
  842. #endif
  843. // if (prefix_search > 0 && (int)result->GetCount() > prefix_search)
  844. // result->Remove(prefix_search, result->GetCount() - prefix_search);
  845. return OpStatus::OK;
  846. }
  847. SearchIterator<INT32> *StringTable::PhraseSearch(const uni_char *words, int prefix_search, int phrase_flags)
  848. {
  849. TVectorIterator *it;
  850. SearchIterator<INT32> *result;
  851. TVector<INT32> *unfiltered_result;
  852. unfiltered_result = OP_NEW(TVector<INT32>, ());
  853. if (unfiltered_result == NULL)
  854. return NULL;
  855. if (OpStatus::IsError(MultiSearch(words, unfiltered_result, FALSE, prefix_search, 0/*no phrases yet*/)) ||
  856. (it = OP_NEW(TVectorIterator, (unfiltered_result, TRUE))) == NULL)
  857. {
  858. OP_DELETE(unfiltered_result);
  859. return NULL;
  860. }
  861. #ifdef SEARCH_ENGINE_PHRASESEARCH
  862. if ((phrase_flags & PhraseMatcher::AllPhrases) != 0 && m_document_source != NULL && unfiltered_result->GetCount() != 0 &&
  863. (m_phrase_search_cutoff == 0 || unfiltered_result->GetCount() <= m_phrase_search_cutoff))
  864. {
  865. PhraseFilter<INT32>* filter = OP_NEW(PhraseFilter<INT32>, (words, *m_document_source, phrase_flags));
  866. if (prefix_search)
  867. phrase_flags |= PhraseMatcher::PrefixSearch;
  868. result = OP_NEW(FilterIterator<INT32>, (it, filter));
  869. if (result == NULL)
  870. OP_DELETE(it);
  871. }
  872. else
  873. result = it;
  874. #else
  875. result = it;
  876. #endif
  877. return result;
  878. }
  879. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  880. size_t StringTable::EstimateMemoryUsed() const
  881. {
  882. return
  883. m_act.EstimateMemoryUsed() +
  884. m_wordbag.EstimateMemoryUsed() +
  885. m_word_cache.EstimateMemoryUsed() +
  886. m_deleted_cache.EstimateMemoryUsed() +
  887. m_word_preflush.EstimateMemoryUsed() +
  888. m_deleted_preflush.EstimateMemoryUsed() +
  889. m_word_backup.EstimateMemoryUsed() +
  890. m_deleted_backup.EstimateMemoryUsed() +
  891. sizeof(m_word_pos) +
  892. sizeof(m_deleted_pos) +
  893. sizeof(m_flags);
  894. }
  895. #endif
  896. StringTable::FileWord *StringTable::FileWord::Create(const uni_char *word, INT32 file_id)
  897. {
  898. FileWord *fw;
  899. if ((fw = OP_NEW(FileWord, ())) == NULL)
  900. return NULL;
  901. if ((fw->word = uni_strdup(word)) == NULL)
  902. {
  903. OP_DELETE(fw);
  904. return NULL;
  905. }
  906. if ((fw->file_ids = OP_NEW(TVector<INT32>, ())) == NULL)
  907. {
  908. OP_DELETE(fw);
  909. return NULL;
  910. }
  911. if (OpStatus::IsError(fw->file_ids->Add(file_id)))
  912. {
  913. OP_DELETE(fw);
  914. return NULL;
  915. }
  916. return fw;
  917. }
  918. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  919. size_t StringTable::FileWord::EstimateMemoryUsed() const
  920. {
  921. size_t sum = 0;
  922. if (word)
  923. sum += uni_strlen(word) + 2*sizeof(size_t);
  924. if (file_ids)
  925. sum += file_ids->EstimateMemoryUsed() + 2*sizeof(size_t);
  926. if (btree)
  927. sum += btree->EstimateMemoryUsed() + 2*sizeof(size_t);
  928. return sum +
  929. sizeof(word) +
  930. sizeof(btree) +
  931. sizeof(file_ids) +
  932. sizeof(is_new_word);
  933. }
  934. #endif
  935. int StringTable::WordCache::BinarySearch(const uni_char *key)
  936. {
  937. int bstart, bend, n2;
  938. bstart = 0;
  939. bend = GetCount();
  940. while (bend > bstart)
  941. {
  942. n2 = (bend - bstart) / 2;
  943. if (act_strcmp(Get(bstart + n2)->word, key) < 0)
  944. bstart = bstart + n2 + 1;
  945. else
  946. bend = bstart + n2;
  947. }
  948. return bstart;
  949. }
  950. OP_STATUS StringTable::SortCaches(void)
  951. {
  952. int i;
  953. if ((m_flags & CachesSorted) != 0)
  954. return OpStatus::OK;
  955. for (i = m_word_cache.GetCount() - 1; i >= 0; --i)
  956. RETURN_IF_ERROR(m_word_cache[i]->file_ids->Sort());
  957. for (i = m_deleted_cache.GetCount() - 1; i >= 0; --i)
  958. RETURN_IF_ERROR(m_deleted_cache[i]->file_ids->Sort());
  959. m_flags |= CachesSorted;
  960. return OpStatus::OK;
  961. }
  962. void StringTable::MergeCaches(void)
  963. {
  964. int i, j;
  965. int k;
  966. unsigned int l;
  967. TVector<INT32> tmp_vec;
  968. FileWord *cache_word, *del_cache;
  969. if ((m_flags & CachesMerged) != 0)
  970. return;
  971. for (i = m_word_cache.GetCount() - 1; i >= 0; --i)
  972. {
  973. cache_word = m_word_cache[i];
  974. j = m_deleted_cache.BinarySearch(cache_word->word);
  975. if (j >= m_deleted_cache.GetCount())
  976. continue;
  977. del_cache = m_deleted_cache[j];
  978. if (act_strcmp(del_cache->word, cache_word->word) != 0)
  979. continue;
  980. if (OpStatus::IsSuccess(VectorBase::Intersect(tmp_vec, *(cache_word->file_ids), *(del_cache->file_ids))))
  981. {
  982. cache_word->file_ids->Differ(tmp_vec);
  983. del_cache->file_ids->Differ(tmp_vec);
  984. tmp_vec.Clear();
  985. }
  986. else { // on ERR_NO_MEMORY
  987. for (k = cache_word->file_ids->GetCount() - 1; k >= 0; --k)
  988. {
  989. l = del_cache->file_ids->Search(cache_word->file_ids->Get(k));
  990. if (l < del_cache->file_ids->GetCount() &&
  991. cache_word->file_ids->Get(k) == del_cache->file_ids->Get(l))
  992. {
  993. cache_word->file_ids->Remove(k);
  994. del_cache->file_ids->Remove(l);
  995. }
  996. }
  997. }
  998. if (del_cache->file_ids->GetCount() == 0)
  999. {
  1000. FileWord* word = m_deleted_cache.Remove(j);
  1001. OP_DELETE(word);
  1002. }
  1003. if (cache_word->file_ids->GetCount() == 0)
  1004. {
  1005. FileWord* word = m_word_cache.Remove(i);
  1006. OP_DELETE(word);
  1007. }
  1008. }
  1009. m_flags |= CachesMerged;
  1010. }
  1011. OP_BOOLEAN StringTable::CheckConsistency(BOOL thorough)
  1012. {
  1013. OP_BOOLEAN a, b;
  1014. RETURN_IF_ERROR((a = m_act.CheckConsistency()));
  1015. RETURN_IF_ERROR((b = m_wordbag.CheckConsistency(0, thorough)));
  1016. if (a == OpBoolean::IS_FALSE || b == OpBoolean::IS_FALSE)
  1017. return OpBoolean::IS_FALSE;
  1018. if (!m_wordbag.GetStorage()->IsStartBlocksSupported())
  1019. return OpBoolean::IS_TRUE;
  1020. // search for all words
  1021. OpAutoPtr<SearchIterator<ACT::PrefixResult> > search_iterator(m_act.PrefixSearch(""));
  1022. if (!search_iterator.get())
  1023. return OpStatus::ERR_NO_MEMORY;
  1024. // Make sure all words have a btree
  1025. if (!search_iterator->Empty())
  1026. do
  1027. if (!m_wordbag.GetStorage()->IsStartBlock(search_iterator->Get().id * m_wordbag.GetStorage()->GetBlockSize()))
  1028. return OpBoolean::IS_FALSE;
  1029. while (search_iterator->Next());
  1030. return OpBoolean::IS_TRUE;
  1031. }
  1032. OP_STATUS StringTable::Recover(const uni_char* path, const uni_char* tablename)
  1033. {
  1034. StringTable indexer_table, temp_indexer_table;
  1035. OpString temp_tablename;
  1036. // make a temporary table where we will insert all values that we can find
  1037. RETURN_IF_ERROR(temp_tablename.AppendFormat(UNI_L("%s.temp"),tablename));
  1038. // if the temporary table was already created, we should empty it
  1039. if (temp_indexer_table.Open(path,temp_tablename.CStr()) != OpBoolean::IS_FALSE)
  1040. RETURN_IF_ERROR(temp_indexer_table.Clear());
  1041. // Open the string table
  1042. if (Open(path, tablename) == OpBoolean::IS_TRUE)
  1043. {
  1044. // search for all words
  1045. OpAutoPtr<SearchIterator<ACT::PrefixResult> > search_iterator(m_act.PrefixSearch(""));
  1046. if (!search_iterator.get())
  1047. return OpStatus::ERR_NO_MEMORY;
  1048. if (!search_iterator->Empty())
  1049. {
  1050. do
  1051. {
  1052. OpString word16;
  1053. RETURN_IF_ERROR(word16.Set(search_iterator->Get().utf8_word));
  1054. OpINT32Vector temp_result;
  1055. // Search for all file ids belonging to that word
  1056. RETURN_IF_ERROR(Search(word16.CStr(), &temp_result));
  1057. // Insert all file ids with that word
  1058. for (UINT32 i= 0; i < temp_result.GetCount(); i++)
  1059. {
  1060. RETURN_IF_ERROR(temp_indexer_table.Insert(word16.CStr(),temp_result.Get(i)));
  1061. }
  1062. }
  1063. while (search_iterator->Next());
  1064. }
  1065. // open iterators need to be closed before closing the tree
  1066. search_iterator.reset();
  1067. // recovery is finished, kill the old file and replace with the new (temp) one
  1068. RETURN_IF_ERROR(Close());
  1069. RETURN_IF_ERROR(temp_indexer_table.Close());
  1070. OpString act_filename, lex_filename, temp_act_filename, temp_lex_filename;
  1071. RETURN_IF_ERROR(act_filename.AppendFormat(UNI_L("%s%c%s%s"), path, PATHSEPCHAR, tablename, UNI_L(ACT_EXTENSION)));
  1072. RETURN_IF_ERROR(lex_filename.AppendFormat(UNI_L("%s%c%s%s"), path, PATHSEPCHAR, tablename, UNI_L(LEX_EXTENSION)));
  1073. RETURN_IF_ERROR(temp_act_filename.AppendFormat(UNI_L("%s%c%s%s"), path, PATHSEPCHAR, temp_tablename.CStr(), UNI_L(ACT_EXTENSION)));
  1074. RETURN_IF_ERROR(temp_lex_filename.AppendFormat(UNI_L("%s%c%s%s"), path, PATHSEPCHAR, temp_tablename.CStr(), UNI_L(LEX_EXTENSION)));
  1075. RETURN_IF_ERROR(BlockStorage::DeleteFile(act_filename.CStr()));
  1076. RETURN_IF_ERROR(BlockStorage::DeleteFile(lex_filename.CStr()));
  1077. RETURN_IF_ERROR(BlockStorage::RenameFile(temp_act_filename.CStr(), act_filename.CStr()));
  1078. RETURN_IF_ERROR(BlockStorage::RenameFile(temp_lex_filename.CStr(), lex_filename.CStr()));
  1079. }
  1080. return OpStatus::OK;
  1081. }
  1082. #ifdef ESTIMATE_MEMORY_USED_AVAILABLE
  1083. size_t StringTable::WordCache::EstimateMemoryUsed() const
  1084. {
  1085. size_t sum = 0;
  1086. unsigned i;
  1087. for (i = 0; i < m_count; ++i)
  1088. sum += ((FileWord *)(m_items[i]))->EstimateMemoryUsed();
  1089. return sum + (m_size*sizeof(void*) + 2*sizeof(size_t)) +
  1090. sizeof(m_size) +
  1091. sizeof(m_items) +
  1092. sizeof(m_count) +
  1093. sizeof(m_step) +
  1094. sizeof(m_min_step);
  1095. }
  1096. #endif
  1097. void StringTable::WordCache::Clear()
  1098. {
  1099. unsigned i;
  1100. for (i = 0; i < m_count; ++i)
  1101. OP_DELETE((FileWord *)(m_items[i]));
  1102. OpGenericVector::Clear();
  1103. }
  1104. void StringTable::WordCache::MoveFrom(WordCache &src)
  1105. {
  1106. Clear();
  1107. m_size = src.m_size;
  1108. m_items = src.m_items;
  1109. m_count = src.m_count;
  1110. m_step = src.m_step;
  1111. src.m_items = NULL;
  1112. src.m_count = 0;
  1113. src.m_size = 0;
  1114. }
  1115. OP_STATUS StringTable::WordCache::CopyFrom(WordCache &src)
  1116. {
  1117. unsigned i;
  1118. FileWord **new_items = OP_NEWA(FileWord*, src.m_count);
  1119. FileWord *dst_item, *src_item;
  1120. RETURN_OOM_IF_NULL(new_items);
  1121. for (i = 0; i < src.m_count; ++i)
  1122. {
  1123. src_item = (FileWord *)src.m_items[i];
  1124. if ((dst_item = new_items[i] = OP_NEW(FileWord, ())) == NULL
  1125. || (dst_item->word = uni_strdup(src_item->word)) == NULL
  1126. /*|| (dst_item->btree = new TBTree<INT32>(*src_item->btree)) == NULL*/
  1127. || (dst_item->file_ids = OP_NEW(TVector<INT32>, ())) == NULL
  1128. || OpStatus::IsError(dst_item->file_ids->DuplicateOf(*src_item->file_ids)))
  1129. {
  1130. do {
  1131. OP_DELETE(new_items[i]);
  1132. } while (i-- > 0);
  1133. OP_DELETEA(new_items);
  1134. return OpStatus::ERR_NO_MEMORY;
  1135. }
  1136. }
  1137. Clear();
  1138. m_items = (void **)new_items;
  1139. m_count = src.m_count;
  1140. m_size = m_count;
  1141. m_step = src.m_step;
  1142. return OpStatus::OK;
  1143. }
  1144. #endif // defined SEARCH_ENGINE && (defined SEARCH_ENGINE_FOR_MAIL || defined SELFTEST)