Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RankIndex.cpp 8.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef VISITED_PAGES_SEARCH
  10. #include "modules/search_engine/RankIndex.h"
  11. #include "modules/pi/system/OpLowLevelFile.h"
  12. #define FNAME_ACT_OLD "w.ax"
  13. #define FNAME_URL_OLD "url.ax"
  14. OP_STATUS RankIndex::Open(const uni_char *path, unsigned short id)
  15. {
  16. OpString fname, old_fname;
  17. int path_len;
  18. OP_STATUS err;
  19. IdTime last_doc;
  20. RETURN_OOM_IF_NULL(fname.Reserve((int)uni_strlen(path) + 16));
  21. RETURN_IF_ERROR(fname.Set(path));
  22. RETURN_IF_ERROR(fname.AppendFormat(UNI_L("%c%.04i%c"), PATHSEPCHAR, id, PATHSEPCHAR));
  23. path_len = fname.Length();
  24. RETURN_IF_ERROR(old_fname.Set(fname));
  25. RETURN_IF_ERROR(old_fname.Append(UNI_L(FNAME_ACT_OLD)));
  26. RETURN_IF_ERROR(fname.Append(UNI_L(FNAME_ACT)));
  27. if (BlockStorage::FileExists(fname.CStr()) == OpBoolean::IS_FALSE &&
  28. BlockStorage::FileExists(old_fname.CStr()) == OpBoolean::IS_TRUE)
  29. RETURN_IF_ERROR(BlockStorage::RenameStorage(old_fname.CStr(), fname.CStr()));
  30. RETURN_IF_ERROR(m_act.Open(fname.CStr(), BlockStorage::OpenReadWrite));
  31. // What if one file is missing and not the others?
  32. fname.Delete(path_len);
  33. RETURN_IF_ERROR(fname.Append(UNI_L(FNAME_WB)));
  34. if (OpStatus::IsError(err = m_wordbag.Open(fname.CStr(), BlockStorage::OpenReadWrite, 512)))
  35. {
  36. OpStatus::Ignore(m_act.Close());
  37. return err;
  38. }
  39. fname.Delete(path_len);
  40. RETURN_IF_ERROR(fname.Append(UNI_L(FNAME_META)));
  41. if (OpStatus::IsError(err = m_metadata.Open(fname.CStr(), BlockStorage::OpenReadWrite, 8192)))
  42. {
  43. m_wordbag.Close();
  44. OpStatus::Ignore(m_act.Close());
  45. return err;
  46. }
  47. fname.Delete(path_len);
  48. RETURN_IF_ERROR(fname.Append(UNI_L(FNAME_BTREE)));
  49. if (OpStatus::IsError(err = m_alldoc.Open(fname.CStr(), BlockStorage::OpenReadWrite, 1024)))
  50. {
  51. m_metadata.Close();
  52. m_wordbag.Close();
  53. OpStatus::Ignore(m_act.Close());
  54. return err;
  55. }
  56. fname.Delete(path_len);
  57. RETURN_IF_ERROR(fname.Append(UNI_L(FNAME_URL)));
  58. old_fname.Delete(path_len);
  59. RETURN_IF_ERROR(old_fname.Append(UNI_L(FNAME_URL_OLD)));
  60. if (BlockStorage::FileExists(fname.CStr()) == OpBoolean::IS_FALSE &&
  61. BlockStorage::FileExists(old_fname.CStr()) == OpBoolean::IS_TRUE)
  62. RETURN_IF_ERROR(BlockStorage::RenameStorage(old_fname.CStr(), fname.CStr()));
  63. if (OpStatus::IsError(err = m_url.Open(fname.CStr(), BlockStorage::OpenReadWrite, &RankIndex::GetTail, this)))
  64. {
  65. OpStatus::Ignore(m_alldoc.Close());
  66. m_metadata.Close();
  67. m_wordbag.Close();
  68. OpStatus::Ignore(m_act.Close());
  69. return err;
  70. }
  71. if (m_alldoc.GetFirst(last_doc) != OpBoolean::IS_TRUE)
  72. m_doc_count = 0;
  73. else
  74. m_doc_count = last_doc.data[IDTIME_ID];
  75. m_id = id;
  76. return OpStatus::OK;
  77. }
  78. void RankIndex::Close(void)
  79. {
  80. if (m_metadata.InTransaction() || m_wordbag.InTransaction() ||
  81. m_alldoc.GetStorage()->InTransaction() || m_act.GetStorage()->InTransaction())
  82. {
  83. OpStatus::Ignore(m_alldoc.Flush());
  84. OpStatus::Ignore(m_act.Flush());
  85. OpStatus::Ignore(m_url.Flush());
  86. OpStatus::Ignore(m_alldoc.Commit());
  87. OpStatus::Ignore(m_metadata.Commit());
  88. OpStatus::Ignore(m_wordbag.Commit());
  89. OpStatus::Ignore(m_act.Commit());
  90. OpStatus::Ignore(m_url.Commit());
  91. }
  92. OpStatus::Ignore(m_alldoc.Close());
  93. m_metadata.Close();
  94. m_wordbag.Close();
  95. OpStatus::Ignore(m_act.Close());
  96. OpStatus::Ignore(m_url.Close());
  97. }
  98. OpFileLength RankIndex::Size()
  99. {
  100. return m_act.GetStorage()->GetFileSize() +
  101. m_wordbag.GetFileSize() +
  102. m_metadata.GetFileSize() +
  103. m_alldoc.GetStorage()->GetFileSize() +
  104. m_url.GetStorage()->GetFileSize();
  105. }
  106. time_t RankIndex::ModifTime(void)
  107. {
  108. OpFileInfo finfo;
  109. OpString dirname;
  110. OpLowLevelFile *dir;
  111. if (m_alldoc.GetStorage() == NULL || m_alldoc.GetStorage()->GetFullName() == NULL)
  112. return (time_t)-1;
  113. RETURN_VALUE_IF_ERROR(dirname.Set(m_alldoc.GetStorage()->GetFullName()), (time_t)-1);
  114. dirname.Delete(dirname.Length() - 1 - (int)op_strlen(FNAME_BTREE));
  115. RETURN_VALUE_IF_ERROR(OpLowLevelFile::Create(&dir, dirname.CStr()), (time_t)-1);
  116. finfo.flags = OpFileInfo::LAST_MODIFIED;
  117. if (OpStatus::IsError(dir->GetFileInfo(&finfo)))
  118. {
  119. OP_DELETE(dir);
  120. return (time_t)-1;
  121. }
  122. OP_DELETE(dir);
  123. return finfo.last_modified;
  124. }
  125. OP_STATUS RankIndex::Clear(void)
  126. {
  127. OpString alldoc_path, metadata_path, act_path, wordbag_path, url_path, directory;
  128. RETURN_IF_ERROR(alldoc_path.Set(m_alldoc.GetStorage()->GetFullName()));
  129. RETURN_IF_ERROR(metadata_path.Set(m_metadata.GetFullName()));
  130. RETURN_IF_ERROR(act_path.Set(m_act.GetStorage()->GetFullName()));
  131. RETURN_IF_ERROR(wordbag_path.Set(m_wordbag.GetFullName()));
  132. RETURN_IF_ERROR(url_path.Set(m_url.GetStorage()->GetFullName()));
  133. RETURN_IF_ERROR(directory.Set(m_alldoc.GetStorage()->GetFullName()));
  134. directory.Delete(directory.Length() - 1 - (int)op_strlen(FNAME_BTREE));
  135. OpStatus::Ignore(Rollback());
  136. Close();
  137. OpStatus::Ignore(BlockStorage::DeleteFile(alldoc_path.CStr()));
  138. OpStatus::Ignore(BlockStorage::DeleteFile(metadata_path.CStr()));
  139. OpStatus::Ignore(BlockStorage::DeleteFile(act_path.CStr()));
  140. OpStatus::Ignore(BlockStorage::DeleteFile(wordbag_path.CStr()));
  141. OpStatus::Ignore(BlockStorage::DeleteFile(url_path.CStr()));
  142. OpStatus::Ignore(BlockStorage::DeleteFile(directory.CStr()));
  143. return OpStatus::OK;
  144. }
  145. OP_STATUS RankIndex::Rollback(void)
  146. {
  147. RETURN_IF_ERROR(m_alldoc.Abort());
  148. RETURN_IF_ERROR(m_metadata.Rollback());
  149. RETURN_IF_ERROR(m_wordbag.Rollback());
  150. m_act.Abort();
  151. m_url.Abort();
  152. return OpStatus::OK;
  153. }
  154. OP_STATUS RankIndex::SetupCursor(BSCursor &cursor)
  155. {
  156. RETURN_IF_ERROR(cursor.AddField("hash", 2)); // quick comparison of the documents
  157. RETURN_IF_ERROR(cursor.AddField("visited", 4)); // time of visiting the page
  158. RETURN_IF_ERROR(cursor.AddField("invalid", 1)); // do not include this one in results
  159. RETURN_IF_ERROR(cursor.AddField("prev_idx", 2)); // older index with the same URL
  160. RETURN_IF_ERROR(cursor.AddField("prev", 4)); // older record with the same URL
  161. RETURN_IF_ERROR(cursor.AddField("next_idx", 2)); // newer index with the same URL
  162. RETURN_IF_ERROR(cursor.AddField("next", 4)); // newer record with the same URL
  163. RETURN_IF_ERROR(cursor.AddField("url", 0));
  164. RETURN_IF_ERROR(cursor.AddField("title", 0));
  165. RETURN_IF_ERROR(cursor.AddField("filename", 0));
  166. RETURN_IF_ERROR(cursor.AddField("thumbnail", 0));
  167. return cursor.AddField("plaintext", 0);
  168. }
  169. OP_STATUS RankIndex::GetTail(char **stored_value, ACT::WordID id, void *usr_val)
  170. {
  171. BSCursor cursor(&(((RankIndex *)usr_val)->m_metadata));
  172. RETURN_IF_ERROR(RankIndex::SetupCursor(cursor));
  173. RETURN_IF_ERROR(cursor.Goto(id));
  174. int size = cursor["url"].GetSize() + 1;
  175. RETURN_OOM_IF_NULL(*stored_value = OP_NEWA(char, size));
  176. cursor["url"].GetStringValue(*stored_value);
  177. return OpStatus::OK;
  178. }
  179. #if defined SEARCH_ENGINE_LOG && (SEARCH_ENGINE_LOG & SEARCH_ENGINE_LOG_VISITEDSEARCH)
  180. OP_STATUS RankIndex::LogFile(OutputLogDevice *log, const uni_char *path, unsigned short id, const uni_char *fname, const uni_char *suffix)
  181. {
  182. OpString fullname;
  183. OpString8 tag, s8;
  184. OP_BOOLEAN e;
  185. RETURN_IF_ERROR(fullname.AppendFormat(UNI_L("%s%c%.04i%c%s"), path, PATHSEPCHAR, id, PATHSEPCHAR, fname));
  186. RETURN_IF_ERROR(s8.Set(fname));
  187. RETURN_IF_ERROR(tag.AppendFormat("%04i.%s", id, s8.CStr()));
  188. if (suffix != NULL)
  189. {
  190. RETURN_IF_ERROR(s8.Set(suffix));
  191. RETURN_IF_ERROR(fullname.Append(suffix));
  192. RETURN_IF_ERROR(tag.Append(s8));
  193. }
  194. RETURN_IF_ERROR(e = BlockStorage::FileExists(fullname));
  195. if (e != OpBoolean::IS_TRUE)
  196. return OpStatus::OK;
  197. log->WriteFile(SearchEngineLog::Debug, tag, fullname);
  198. return OpStatus::OK;
  199. }
  200. OP_STATUS RankIndex::LogSubDir(OutputLogDevice *log, const uni_char *path, unsigned short id)
  201. {
  202. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_ACT)));
  203. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_ACT), UNI_L("-j")));
  204. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_ACT), UNI_L("-g")));
  205. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_WB)));
  206. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_WB), UNI_L("-j")));
  207. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_WB), UNI_L("-g")));
  208. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_META)));
  209. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_META), UNI_L("-j")));
  210. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_META), UNI_L("-g")));
  211. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_BTREE)));
  212. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_BTREE), UNI_L("-j")));
  213. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_BTREE), UNI_L("-g")));
  214. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_URL)));
  215. RETURN_IF_ERROR(RankIndex::LogFile(log, path, id, UNI_L(FNAME_URL), UNI_L("-j")));
  216. return RankIndex::LogFile(log, path, id, UNI_L(FNAME_URL), UNI_L("-g"));
  217. }
  218. #endif // SEARCH_ENGINE_LOG
  219. #endif // VISITED_PAGES_SEARCH