You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_reader.cc 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "db/log_reader.h"
  5. #include <stdio.h>
  6. #include "leveldb/env.h"
  7. #include "util/coding.h"
  8. #include "util/crc32c.h"
  9. namespace leveldb {
  10. namespace log {
  11. Reader::Reporter::~Reporter() {
  12. }
  13. Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
  14. uint64_t initial_offset)
  15. : file_(file),
  16. reporter_(reporter),
  17. checksum_(checksum),
  18. backing_store_(new char[kBlockSize]),
  19. buffer_(),
  20. eof_(false),
  21. last_record_offset_(0),
  22. end_of_buffer_offset_(0),
  23. initial_offset_(initial_offset),
  24. resyncing_(initial_offset > 0) {
  25. }
  26. Reader::~Reader() {
  27. delete[] backing_store_;
  28. }
  29. bool Reader::SkipToInitialBlock() {
  30. size_t offset_in_block = initial_offset_ % kBlockSize;
  31. uint64_t block_start_location = initial_offset_ - offset_in_block;
  32. // Don't search a block if we'd be in the trailer
  33. if (offset_in_block > kBlockSize - 6) {
  34. offset_in_block = 0;
  35. block_start_location += kBlockSize;
  36. }
  37. end_of_buffer_offset_ = block_start_location;
  38. // Skip to start of first block that can contain the initial record
  39. if (block_start_location > 0) {
  40. Status skip_status = file_->Skip(block_start_location);
  41. if (!skip_status.ok()) {
  42. ReportDrop(block_start_location, skip_status);
  43. return false;
  44. }
  45. }
  46. return true;
  47. }
  48. bool Reader::ReadRecord(Slice* record, std::string* scratch) {
  49. if (last_record_offset_ < initial_offset_) {
  50. if (!SkipToInitialBlock()) {
  51. return false;
  52. }
  53. }
  54. scratch->clear();
  55. record->clear();
  56. bool in_fragmented_record = false;
  57. // Record offset of the logical record that we're reading
  58. // 0 is a dummy value to make compilers happy
  59. uint64_t prospective_record_offset = 0;
  60. Slice fragment;
  61. while (true) {
  62. const unsigned int record_type = ReadPhysicalRecord(&fragment);
  63. // ReadPhysicalRecord may have only had an empty trailer remaining in its
  64. // internal buffer. Calculate the offset of the next physical record now
  65. // that it has returned, properly accounting for its header size.
  66. uint64_t physical_record_offset =
  67. end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
  68. if (resyncing_) {
  69. if (record_type == kMiddleType) {
  70. continue;
  71. } else if (record_type == kLastType) {
  72. resyncing_ = false;
  73. continue;
  74. } else {
  75. resyncing_ = false;
  76. }
  77. }
  78. switch (record_type) {
  79. case kFullType:
  80. if (in_fragmented_record) {
  81. // Handle bug in earlier versions of log::Writer where
  82. // it could emit an empty kFirstType record at the tail end
  83. // of a block followed by a kFullType or kFirstType record
  84. // at the beginning of the next block.
  85. if (scratch->empty()) {
  86. in_fragmented_record = false;
  87. } else {
  88. ReportCorruption(scratch->size(), "partial record without end(1)");
  89. }
  90. }
  91. prospective_record_offset = physical_record_offset;
  92. scratch->clear();
  93. *record = fragment;
  94. last_record_offset_ = prospective_record_offset;
  95. return true;
  96. case kFirstType:
  97. if (in_fragmented_record) {
  98. // Handle bug in earlier versions of log::Writer where
  99. // it could emit an empty kFirstType record at the tail end
  100. // of a block followed by a kFullType or kFirstType record
  101. // at the beginning of the next block.
  102. if (scratch->empty()) {
  103. in_fragmented_record = false;
  104. } else {
  105. ReportCorruption(scratch->size(), "partial record without end(2)");
  106. }
  107. }
  108. prospective_record_offset = physical_record_offset;
  109. scratch->assign(fragment.data(), fragment.size());
  110. in_fragmented_record = true;
  111. break;
  112. case kMiddleType:
  113. if (!in_fragmented_record) {
  114. ReportCorruption(fragment.size(),
  115. "missing start of fragmented record(1)");
  116. } else {
  117. scratch->append(fragment.data(), fragment.size());
  118. }
  119. break;
  120. case kLastType:
  121. if (!in_fragmented_record) {
  122. ReportCorruption(fragment.size(),
  123. "missing start of fragmented record(2)");
  124. } else {
  125. scratch->append(fragment.data(), fragment.size());
  126. *record = Slice(*scratch);
  127. last_record_offset_ = prospective_record_offset;
  128. return true;
  129. }
  130. break;
  131. case kEof:
  132. if (in_fragmented_record) {
  133. // This can be caused by the writer dying immediately after
  134. // writing a physical record but before completing the next; don't
  135. // treat it as a corruption, just ignore the entire logical record.
  136. scratch->clear();
  137. }
  138. return false;
  139. case kBadRecord:
  140. if (in_fragmented_record) {
  141. ReportCorruption(scratch->size(), "error in middle of record");
  142. in_fragmented_record = false;
  143. scratch->clear();
  144. }
  145. break;
  146. default: {
  147. char buf[40];
  148. snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
  149. ReportCorruption(
  150. (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
  151. buf);
  152. in_fragmented_record = false;
  153. scratch->clear();
  154. break;
  155. }
  156. }
  157. }
  158. return false;
  159. }
  160. uint64_t Reader::LastRecordOffset() {
  161. return last_record_offset_;
  162. }
  163. void Reader::ReportCorruption(uint64_t bytes, const char* reason) {
  164. ReportDrop(bytes, Status::Corruption(reason, file_->GetName()));
  165. }
  166. void Reader::ReportDrop(uint64_t bytes, const Status& reason) {
  167. if (reporter_ != NULL &&
  168. end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
  169. reporter_->Corruption(static_cast<size_t>(bytes), reason);
  170. }
  171. }
  172. unsigned int Reader::ReadPhysicalRecord(Slice* result) {
  173. while (true) {
  174. if (buffer_.size() < kHeaderSize) {
  175. if (!eof_) {
  176. // Last read was a full read, so this is a trailer to skip
  177. buffer_.clear();
  178. Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
  179. end_of_buffer_offset_ += buffer_.size();
  180. if (!status.ok()) {
  181. buffer_.clear();
  182. ReportDrop(kBlockSize, status);
  183. eof_ = true;
  184. return kEof;
  185. } else if (buffer_.size() < kBlockSize) {
  186. eof_ = true;
  187. }
  188. continue;
  189. } else {
  190. // Note that if buffer_ is non-empty, we have a truncated header at the
  191. // end of the file, which can be caused by the writer crashing in the
  192. // middle of writing the header. Instead of considering this an error,
  193. // just report EOF.
  194. buffer_.clear();
  195. return kEof;
  196. }
  197. }
  198. // Parse the header
  199. const char* header = buffer_.data();
  200. const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
  201. const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
  202. const unsigned int type = header[6];
  203. const uint32_t length = a | (b << 8);
  204. if (kHeaderSize + length > buffer_.size()) {
  205. size_t drop_size = buffer_.size();
  206. buffer_.clear();
  207. if (!eof_) {
  208. ReportCorruption(drop_size, "bad record length");
  209. return kBadRecord;
  210. }
  211. // If the end of the file has been reached without reading |length| bytes
  212. // of payload, assume the writer died in the middle of writing the record.
  213. // Don't report a corruption.
  214. return kEof;
  215. }
  216. if (type == kZeroType && length == 0) {
  217. // Skip zero length record without reporting any drops since
  218. // such records are produced by the mmap based writing code in
  219. // env_posix.cc that preallocates file regions.
  220. buffer_.clear();
  221. return kBadRecord;
  222. }
  223. // Check crc
  224. if (checksum_) {
  225. uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
  226. uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
  227. if (actual_crc != expected_crc) {
  228. // Drop the rest of the buffer since "length" itself may have
  229. // been corrupted and if we trust it, we could find some
  230. // fragment of a real log record that just happens to look
  231. // like a valid log record.
  232. size_t drop_size = buffer_.size();
  233. buffer_.clear();
  234. ReportCorruption(drop_size, "checksum mismatch");
  235. return kBadRecord;
  236. }
  237. }
  238. buffer_.remove_prefix(kHeaderSize + length);
  239. // Skip physical record that started before initial_offset_
  240. if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
  241. initial_offset_) {
  242. result->clear();
  243. return kBadRecord;
  244. }
  245. *result = Slice(header + kHeaderSize, length);
  246. return type;
  247. }
  248. }
  249. } // namespace log
  250. } // namespace leveldb