Opera 12.15 Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BinCompressor.cpp 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /* -*- Mode: c++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*-
  2. **
  3. ** Copyright (C) 1995-2011 Opera Software ASA. All rights reserved.
  4. **
  5. ** This file is part of the Opera web browser.
  6. ** It may not be distributed under any circumstances.
  7. */
  8. #include "core/pch.h"
  9. #ifdef SEARCH_ENGINE
  10. #include "modules/search_engine/BinCompressor.h"
  11. #define DICT_SIZE 0x4000
  12. #define DICT_MASK 0x3FFF
  13. // Modified Bernstein hash (experimentally as good as FNV-1a hash)
  14. #define hash(cp) (((((((cp[0] * 0x21) ^ cp[1]) * 0x21) ^ cp[2]) * 0x21) ^ cp[3]) & DICT_MASK)
  15. OP_STATUS BinCompressor::InitCompDict(void)
  16. {
  17. OP_ASSERT(m_dict == NULL); // it is not necessary to init the dictionary several times
  18. FreeCompDict();
  19. RETURN_OOM_IF_NULL(m_dict = OP_NEWA(UINT32, DICT_SIZE));
  20. #ifdef VALGRIND
  21. // Mark 'm_dict' as defined, even though it is not, since the
  22. // algorithm is not sensitive to the initial values for correct
  23. // operation.
  24. op_valgrind_set_defined(m_dict, DICT_SIZE*sizeof(UINT32));
  25. #endif
  26. return OpStatus::OK;
  27. }
  28. unsigned BinCompressor::Compress(unsigned char *dst, const void *_src, unsigned len)
  29. {
  30. const unsigned char *src = (const unsigned char*)_src, *sp, *cp, *end, *match_end;
  31. unsigned char *op;
  32. int i, di;
  33. const unsigned char *dptr;
  34. if (len < 8)
  35. {
  36. dst[0] = len;
  37. dst[1] = 0;
  38. dst[2] = 0;
  39. dst[3] = 0;
  40. if (len == 0)
  41. return 4;
  42. return (unsigned)(OutputLiteral(dst + 4, src, src + len) - dst);
  43. }
  44. sp = src;
  45. cp = src;
  46. end = cp + len;
  47. match_end = end - 4;
  48. op = dst + 4;
  49. while (cp < match_end)
  50. {
  51. di = hash(cp);
  52. dptr = src+m_dict[di];
  53. m_dict[di] = (UINT32)(cp-src);
  54. if (dptr >= src && dptr < cp && cp - dptr <= 0xFFFF &&
  55. dptr[0] == cp[0] && dptr[1] == cp[1] && dptr[2] == cp[2] && dptr[3] == cp[3])
  56. {
  57. // match found
  58. if (cp > sp)
  59. op = OutputLiteral(op, sp, cp);
  60. sp = cp;
  61. cp += 4;
  62. i = 4;
  63. while (cp < end && *cp == dptr[i])
  64. {
  65. ++cp;
  66. ++i;
  67. }
  68. op = OutputMatch(op, (unsigned)(cp - sp), (unsigned short)(sp - dptr));
  69. sp++;
  70. while (sp < cp && sp < match_end)
  71. {
  72. m_dict[hash(sp)] = (UINT32)(sp-src);
  73. ++sp;
  74. }
  75. sp = cp;
  76. }
  77. else {
  78. ++cp;
  79. }
  80. }
  81. cp = end;
  82. if (cp > sp)
  83. op = OutputLiteral(op, sp, cp);
  84. i = (int)(cp - src);
  85. dst[0] = i & 0xFF;
  86. dst[1] = (i >> 8) & 0xFF;
  87. dst[2] = (i >> 16) & 0xFF;
  88. dst[3] = (i >> 24) & 0xFF;
  89. return (unsigned)(op - dst);
  90. }
  91. unsigned BinCompressor::Decompress(void *_dst, const unsigned char *src, unsigned len)
  92. {
  93. const unsigned char *cp;
  94. unsigned char *dst = (unsigned char *)_dst, *op;
  95. register unsigned char *shiftp;
  96. int c;
  97. int lit_len, max_len;
  98. int shift;
  99. if (!dst || !src || len < 4)
  100. return 0;
  101. max_len = (int)Length(src);
  102. cp = src + 4;
  103. op = dst;
  104. src += len;
  105. while (cp < src && op - dst < max_len)
  106. {
  107. if ((*cp & 0x40) == 0) // literal
  108. {
  109. // length
  110. lit_len = *cp & 0x3F;
  111. shift = 6;
  112. while ((*cp++ & 0x80) != 0)
  113. {
  114. if (shift > 30 || cp >= src)
  115. return 0;
  116. lit_len |= (*cp & 0x7F) << shift;
  117. shift += 7;
  118. }
  119. if (lit_len == 0 ||
  120. op - dst + lit_len > max_len ||
  121. cp + lit_len > src)
  122. return 0;
  123. // differences
  124. while (lit_len-- > 0)
  125. *op++ = *cp++;
  126. }
  127. else { // match
  128. // length
  129. lit_len = *cp & 0x1F;
  130. c = (*cp & 0x20) == 0;
  131. shift = 5;
  132. while ((*cp++ & 0x80) != 0)
  133. {
  134. if (shift > 30 || cp >= src)
  135. return 0;
  136. lit_len |= (*cp & 0x7F) << shift;
  137. shift += 7;
  138. }
  139. lit_len += 4;
  140. if (cp >= src || (c && cp+1 >= src))
  141. return 0;
  142. shift = *cp++;
  143. if (c)
  144. shift |= ((int)*cp++) << 8;
  145. shiftp = op - shift;
  146. if (shiftp < dst || shiftp >= op || op - dst + lit_len > max_len)
  147. return 0;
  148. while (lit_len-- > 0)
  149. *op++ = *shiftp++;
  150. }
  151. }
  152. return (unsigned)(op - dst);
  153. }
  154. unsigned BinCompressor::Length(const unsigned char *src)
  155. {
  156. return (unsigned)(src[0] | ((int)src[1]) << 8 | ((int)src[2]) << 16 | ((int)src[3]) << 24);
  157. }
  158. unsigned char *BinCompressor::OutputLiteral(unsigned char *op, const unsigned char *sp, const unsigned char *cp)
  159. {
  160. int length;
  161. OP_ASSERT(cp - sp > 0);
  162. length = (int)(cp - sp);
  163. *(op++) = (unsigned char)(length & 0x3F) | ((length > 0x3F) << 7);
  164. length >>= 6;
  165. while (length > 0)
  166. {
  167. *(op++) = (unsigned char)(length & 0x7F) | ((length > 0x7F) << 7);
  168. length >>= 7;
  169. }
  170. while (sp < cp)
  171. *op++ = *sp++;
  172. return op;
  173. }
  174. unsigned char *BinCompressor::OutputMatch(unsigned char *op, unsigned length, unsigned short offset)
  175. {
  176. length -= 4;
  177. *(op++) = (unsigned char)(length & 0x1F) | ((length > 0x1F) << 7) | 0x40 | ((offset <= 0xFF) << 5);
  178. length >>= 5;
  179. while (length > 0)
  180. {
  181. *(op++) = (unsigned char)(length & 0x7F) | ((length > 0x7F) << 7);
  182. length >>= 7;
  183. }
  184. if (offset <= 0xFF)
  185. *op++ = (unsigned char)offset;
  186. else
  187. {
  188. *op++ = (unsigned char)(offset & 0xFF);
  189. *op++ = (unsigned char)(offset >> 8);
  190. }
  191. return op;
  192. }
  193. #undef DICT_SIZE
  194. #undef DICT_MASK
  195. #undef hash
  196. #endif // SEARCH_ENGINE