Opera 12.15 Source Code
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef SEARCH_ENGINE // to remove compilation errors with ADVANCED_OPVECTOR
  10. #include "modules/search_engine/ACT.h"
  11. // this file contains case-insensitive variants of ACT methods
  12. static char *uni_strupr8c(const uni_char *word)
  13. {
  14. uni_char *upper_word;
  15. char *utf8_word;
  16. int len;
  17. len = (int)uni_strlen(word);
  18. if ((upper_word = OP_NEWA(uni_char, len + 1)) == NULL)
  19. return NULL;
  20. uni_strcpy(upper_word, word);
  21. uni_strupr(upper_word);
  22. if ((utf8_word = OP_NEWA(char, len * 4 + 1)) == NULL)
  23. {
  24. OP_DELETEA(upper_word);
  25. return NULL;
  26. }
  27. to_utf8(utf8_word, upper_word, len * 4 + 1);
  28. OP_DELETEA(upper_word);
  29. return utf8_word;
  30. }
  31. static char *uni_str8c(const uni_char *word)
  32. {
  33. char *utf8_word;
  34. int len;
  35. len = (int)uni_strlen(word);
  36. if ((utf8_word = OP_NEWA(char, len * 4 + 1)) == NULL)
  37. {
  38. OP_DELETEA(utf8_word);
  39. return NULL;
  40. }
  41. to_utf8(utf8_word, word, len * 4 + 1);
  42. return utf8_word;
  43. }
  44. static char *strupr8c(const char *utf8_word)
  45. {
  46. uni_char *upper_word;
  47. char *up8_word;
  48. int len;
  49. len = (int)op_strlen(utf8_word);
  50. if ((upper_word = OP_NEWA(uni_char, len + 1)) == NULL)
  51. return NULL;
  52. len = from_utf8(upper_word, utf8_word, (len + 1) * sizeof(uni_char));
  53. uni_strupr(upper_word);
  54. if ((up8_word = OP_NEWA(char, len * 2 + 1)) == NULL)
  55. {
  56. OP_DELETEA(upper_word);
  57. return NULL;
  58. }
  59. to_utf8(up8_word, upper_word, len * 2 + 1);
  60. OP_DELETEA(upper_word);
  61. return up8_word;
  62. }
  63. static char *strlwr8(char *utf8_word)
  64. {
  65. uni_char *lower_word;
  66. int len;
  67. len = (int)op_strlen(utf8_word) + 1;
  68. if ((lower_word = OP_NEWA(uni_char, len)) == NULL)
  69. return NULL;
  70. from_utf8(lower_word, utf8_word, len * sizeof(uni_char));
  71. uni_strlwr(lower_word);
  72. to_utf8(utf8_word, lower_word, len);
  73. OP_DELETEA(lower_word);
  74. return utf8_word;
  75. }
  76. OP_STATUS ACT::AddWord(const uni_char *word, WordID id, BOOL overwrite_existing)
  77. {
  78. char *utf8_word;
  79. OP_STATUS err;
  80. RETURN_OOM_IF_NULL(utf8_word = uni_strupr8c(word));
  81. err = AddCaseWord(utf8_word, id, overwrite_existing);
  82. OP_DELETEA(utf8_word);
  83. return err;
  84. }
  85. OP_STATUS ACT::AddWord(const char *utf8_word, WordID id, BOOL overwrite_existing)
  86. {
  87. char *up8_word;
  88. OP_STATUS err;
  89. RETURN_OOM_IF_NULL(up8_word = strupr8c(utf8_word));
  90. err = AddCaseWord(up8_word, id, overwrite_existing);
  91. OP_DELETEA(up8_word);
  92. return err;
  93. }
  94. OP_STATUS ACT::AddCaseWord(const uni_char *word, WordID id, BOOL overwrite_existing)
  95. {
  96. char *utf8_word;
  97. OP_STATUS err;
  98. RETURN_OOM_IF_NULL(utf8_word = uni_str8c(word));
  99. err = AddCaseWord(utf8_word, id, overwrite_existing);
  100. OP_DELETEA(utf8_word);
  101. return err;
  102. }
  103. OP_STATUS ACT::DeleteWord(const uni_char *word)
  104. {
  105. char *utf8_word;
  106. OP_STATUS err;
  107. RETURN_OOM_IF_NULL(utf8_word = uni_strupr8c(word));
  108. err = DeleteCaseWord(utf8_word);
  109. OP_DELETEA(utf8_word);
  110. return err;
  111. }
  112. OP_STATUS ACT::DeleteWord(const char *utf8_word)
  113. {
  114. char *up8_word;
  115. OP_STATUS err;
  116. RETURN_OOM_IF_NULL(up8_word = strupr8c(utf8_word));
  117. err = DeleteCaseWord(up8_word);
  118. OP_DELETEA(up8_word);
  119. return err;
  120. }
  121. OP_STATUS ACT::DeleteCaseWord(const uni_char *word)
  122. {
  123. char *utf8_word;
  124. OP_STATUS err;
  125. RETURN_OOM_IF_NULL(utf8_word = uni_str8c(word));
  126. err = DeleteCaseWord(utf8_word);
  127. OP_DELETEA(utf8_word);
  128. return err;
  129. }
  130. ACT::WordID ACT::Search(const uni_char *word)
  131. {
  132. char *utf8_word;
  133. ACT::WordID id;
  134. if ((utf8_word = uni_strupr8c(word)) == NULL)
  135. return 0;
  136. id = CaseSearch(utf8_word);
  137. OP_DELETEA(utf8_word);
  138. return id;
  139. }
  140. ACT::WordID ACT::Search(const char *utf8_word)
  141. {
  142. char *up8_word;
  143. ACT::WordID id;
  144. if ((up8_word = strupr8c(utf8_word)) == NULL)
  145. return 0;
  146. id = CaseSearch(up8_word);
  147. OP_DELETEA(up8_word);
  148. return id;
  149. }
  150. ACT::WordID ACT::CaseSearch(const uni_char *word)
  151. {
  152. char *utf8_word;
  153. ACT::WordID id;
  154. if ((utf8_word = uni_str8c(word)) == NULL)
  155. return 0;
  156. id = CaseSearch(utf8_word);
  157. OP_DELETEA(utf8_word);
  158. return id;
  159. }
  160. int ACT::PrefixSearch(WordID *result, const uni_char *prefix, int max_results)
  161. {
  162. char *utf8_word;
  163. int count;
  164. if ((utf8_word = uni_strupr8c(prefix)) == NULL)
  165. return 0;
  166. count = PrefixCaseSearch(result, utf8_word, max_results);
  167. OP_DELETEA(utf8_word);
  168. return count;
  169. }
  170. int ACT::PrefixSearch(WordID *result, const char *utf8_prefix, int max_results)
  171. {
  172. char *up8_word;
  173. int count;
  174. if ((up8_word = strupr8c(utf8_prefix)) == NULL)
  175. return 0;
  176. count = PrefixCaseSearch(result, up8_word, max_results);
  177. OP_DELETEA(up8_word);
  178. return count;
  179. }
  180. int ACT::PrefixCaseSearch(WordID *result, const uni_char *prefix, int max_results)
  181. {
  182. char *utf8_word;
  183. int count;
  184. if ((utf8_word = uni_str8c(prefix)) == NULL)
  185. return 0;
  186. count = PrefixCaseSearch(result, utf8_word, max_results);
  187. OP_DELETEA(utf8_word);
  188. return count;
  189. }
  190. int ACT::PrefixWords(uni_char **result, const uni_char *prefix, int max_results)
  191. {
  192. char *utf8_word;
  193. int i, j, count, len, prefix_len;
  194. char **result8;
  195. if ((utf8_word = uni_strupr8c(prefix)) == NULL)
  196. return 0;
  197. if ((result8 = OP_NEWA(char *, max_results)) == NULL)
  198. {
  199. OP_DELETEA(utf8_word);
  200. return 0;
  201. }
  202. count = PrefixCaseWords(result8, utf8_word, max_results);
  203. OP_DELETEA(utf8_word);
  204. prefix_len = (int)uni_strlen(prefix);
  205. for (i = 0; i < count; ++i)
  206. {
  207. len = (int)op_strlen(result8[i]);
  208. if ((result[i] = OP_NEWA(uni_char, len + 1)) == NULL)
  209. {
  210. for (j = 0; j < i; ++j)
  211. OP_DELETEA(result[j]);
  212. for (j = 0; j < count; ++j)
  213. OP_DELETEA(result8[j]);
  214. OP_DELETEA(result8);
  215. return 0;
  216. }
  217. if (prefix_len > 0 && uni_islower(prefix[prefix_len - 1]))
  218. {
  219. uni_strncpy(result[i], prefix, prefix_len);
  220. from_utf8(result[i] + prefix_len, result8[i] + prefix_len, sizeof(uni_char) * (len + 1 - prefix_len));
  221. uni_strlwr(result[i] + prefix_len);
  222. }
  223. else from_utf8(result[i], result8[i], sizeof(uni_char) * (len + 1));
  224. }
  225. for (j = 0; j < count; ++j)
  226. OP_DELETEA(result8[j]);
  227. OP_DELETEA(result8);
  228. return count;
  229. }
  230. int ACT::PrefixWords(char **result, const char *utf8_prefix, int max_results)
  231. {
  232. char *up8_word;
  233. int i, count, len;
  234. uni_char last_char[3]; /* ARRAY OK 2010-09-24 roarl */
  235. if ((up8_word = strupr8c(utf8_prefix)) == NULL)
  236. return 0;
  237. count = PrefixCaseWords(result, up8_word, max_results);
  238. OP_DELETEA(up8_word);
  239. if (*utf8_prefix != 0)
  240. {
  241. len = (int)op_strlen(utf8_prefix);
  242. i = len - 1;
  243. while ((utf8_prefix[i] & 0x80) != 0 && i > 0 && (utf8_prefix[i - 1] & 0x80) != 0)
  244. --i;
  245. from_utf8(last_char, utf8_prefix + i, 3 * sizeof(uni_char));
  246. if (uni_islower(*last_char))
  247. {
  248. for (i = 0; i < count; ++i)
  249. {
  250. op_strncpy(result[i], utf8_prefix, len);
  251. strlwr8(result[i] + len);
  252. }
  253. }
  254. else {
  255. for (i = 0; i < count; ++i)
  256. op_strncpy(result[i], utf8_prefix, len);
  257. }
  258. }
  259. return count;
  260. }
  261. int ACT::PrefixCaseWords(uni_char **result, const uni_char *prefix, int max_results)
  262. {
  263. char *utf8_word;
  264. int i, j, count, len;
  265. char **result8;
  266. if ((utf8_word = uni_str8c(prefix)) == NULL)
  267. return 0;
  268. if ((result8 = OP_NEWA(char *, max_results)) == NULL)
  269. {
  270. OP_DELETEA(utf8_word);
  271. return 0;
  272. }
  273. count = PrefixCaseWords(result8, utf8_word, max_results);
  274. OP_DELETEA(utf8_word);
  275. for (i = 0; i < count; ++i)
  276. {
  277. len = (int)op_strlen(result8[i]) + 1;
  278. if ((result[i] = OP_NEWA(uni_char, len)) == NULL)
  279. {
  280. for (j = 0; j < i; ++j)
  281. OP_DELETEA(result[j]);
  282. for (j = 0; j < count; ++j)
  283. OP_DELETEA(result8[j]);
  284. OP_DELETEA(result8);
  285. return 0;
  286. }
  287. from_utf8(result[i], result8[i], sizeof(uni_char) * len);
  288. }
  289. for (j = 0; j < count; ++j)
  290. OP_DELETEA(result8[j]);
  291. OP_DELETEA(result8);
  292. return count;
  293. }
  294. SearchIterator<ACT::PrefixResult> *ACT::PrefixSearch(const uni_char *prefix, BOOL single_word)
  295. {
  296. char *utf8_word;
  297. SearchIterator<ACT::PrefixResult> *it;
  298. if ((utf8_word = uni_strupr8c(prefix)) == NULL)
  299. return 0;
  300. it = PrefixCaseSearch(utf8_word, single_word);
  301. OP_DELETEA(utf8_word);
  302. return it;
  303. }
  304. SearchIterator<ACT::PrefixResult> *ACT::PrefixSearch(const char *utf8_prefix, BOOL single_word)
  305. {
  306. char *up8_word;
  307. SearchIterator<ACT::PrefixResult> *it;
  308. if ((up8_word = strupr8c(utf8_prefix)) == NULL)
  309. return 0;
  310. it = PrefixCaseSearch(up8_word, single_word);
  311. OP_DELETEA(up8_word);
  312. return it;
  313. }
  314. SearchIterator<ACT::PrefixResult> *ACT::PrefixCaseSearch(const uni_char *prefix, BOOL single_word)
  315. {
  316. char *utf8_word;
  317. SearchIterator<ACT::PrefixResult> *it;
  318. if ((utf8_word = uni_str8c(prefix)) == NULL)
  319. return 0;
  320. it = PrefixCaseSearch(utf8_word, single_word);
  321. OP_DELETEA(utf8_word);
  322. return it;
  323. }
  324. #endif // SEARCH_ENGINE