summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjorlow@chromium.org <jorlow@chromium.org@62dab493-f737-651d-591e-8d6aee1b9529>2011-03-28 20:43:44 +0000
committerjorlow@chromium.org <jorlow@chromium.org@62dab493-f737-651d-591e-8d6aee1b9529>2011-03-28 20:43:44 +0000
commite2da744e1241fec301351ca34730877e6cff722a (patch)
tree7e8e474bb5a8f038cf9e69e591529ee614d939d9
parente11bdf1935bc5a46db790ef414110149009f8c6a (diff)
downloadleveldb-e2da744e1241fec301351ca34730877e6cff722a.tar.gz
Upstream changes.
git-svn-id: https://leveldb.googlecode.com/svn/trunk@16 62dab493-f737-651d-591e-8d6aee1b9529
-rw-r--r--db/builder.cc4
-rw-r--r--db/db_bench.cc4
-rw-r--r--db/db_impl.cc4
-rw-r--r--db/db_iter.cc5
-rw-r--r--db/log_reader.cc6
-rw-r--r--db/log_writer.cc11
-rw-r--r--db/repair.cc2
-rw-r--r--db/table_cache.cc3
-rw-r--r--db/table_cache.h15
-rw-r--r--db/version_set.cc22
-rw-r--r--doc/log_format.txt15
-rw-r--r--include/env.h3
-rw-r--r--include/table.h6
-rw-r--r--table/table.cc2
-rw-r--r--table/table_test.cc4
-rw-r--r--util/env_chromium.cc16
-rw-r--r--util/env_posix.cc16
17 files changed, 67 insertions, 71 deletions
diff --git a/db/builder.cc b/db/builder.cc
index f3d0fe2..d5585c3 100644
--- a/db/builder.cc
+++ b/db/builder.cc
@@ -74,7 +74,9 @@ Status BuildTable(const std::string& dbname,
if (s.ok()) {
// Verify that the table is usable
- Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number);
+ Iterator* it = table_cache->NewIterator(ReadOptions(),
+ meta->number,
+ meta->file_size);
s = it->status();
delete it;
}
diff --git a/db/db_bench.cc b/db/db_bench.cc
index 7026ca1..c7a662d 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -354,7 +354,7 @@ class Benchmark {
private:
void Crc32c(int size, const char* label) {
// Checksum about 500MB of data total
- string data(size, 'x');
+ std::string data(size, 'x');
int64_t bytes = 0;
uint32_t crc = 0;
while (bytes < 500 * 1048576) {
@@ -371,7 +371,7 @@ class Benchmark {
void SHA1(int size, const char* label) {
// SHA1 about 100MB of data total
- string data(size, 'x');
+ std::string data(size, 'x');
int64_t bytes = 0;
char sha1[20];
while (bytes < 100 * 1048576) {
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 12c02b3..f14167a 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -642,7 +642,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
if (s.ok() && current_entries > 0) {
// Verify that the table is usable
- Iterator* iter = table_cache_->NewIterator(ReadOptions(),output_number);
+ Iterator* iter = table_cache_->NewIterator(ReadOptions(),
+ output_number,
+ current_bytes);
s = iter->status();
delete iter;
if (s.ok()) {
diff --git a/db/db_iter.cc b/db/db_iter.cc
index 6726b51..beb4d74 100644
--- a/db/db_iter.cc
+++ b/db/db_iter.cc
@@ -340,8 +340,11 @@ void DBIter::ReadIndirectValue(Slice ref) const {
std::string fname = LargeValueFileName(*dbname_, large_ref);
RandomAccessFile* file;
Status s = env_->NewRandomAccessFile(fname, &file);
+ uint64_t file_size = 0;
+ if (s.ok()) {
+ s = env_->GetFileSize(fname, &file_size);
+ }
if (s.ok()) {
- uint64_t file_size = file->Size();
uint64_t value_size = large_ref.ValueSize();
large_->value.resize(value_size);
Slice result;
diff --git a/db/log_reader.cc b/db/log_reader.cc
index 39a6d2b..407700d 100644
--- a/db/log_reader.cc
+++ b/db/log_reader.cc
@@ -105,7 +105,7 @@ void Reader::ReportDrop(size_t bytes, const char* reason) {
unsigned int Reader::ReadPhysicalRecord(Slice* result) {
while (true) {
- if (buffer_.size() <= kHeaderSize) {
+ if (buffer_.size() < kHeaderSize) {
if (!eof_) {
// Last read was a full read, so this is a trailer to skip
buffer_.clear();
@@ -124,12 +124,10 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
} else if (buffer_.size() == 0) {
// End of file
return kEof;
- } else if (buffer_.size() < kHeaderSize) {
+ } else {
ReportDrop(buffer_.size(), "truncated record at end of file");
buffer_.clear();
return kEof;
- } else {
- // We have a trailing zero-length record. Fall through and check it.
}
}
diff --git a/db/log_writer.cc b/db/log_writer.cc
index 465eca2..fc33e6e 100644
--- a/db/log_writer.cc
+++ b/db/log_writer.cc
@@ -35,18 +35,19 @@ Status Writer::AddRecord(const Slice& slice) {
do {
const int leftover = kBlockSize - block_offset_;
assert(leftover >= 0);
- if (leftover <= kHeaderSize) {
+ if (leftover < kHeaderSize) {
// Switch to a new block
if (leftover > 0) {
- // Fill the trailer
- dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00", leftover));
+ // Fill the trailer (literal below relies on kHeaderSize being 7)
+ assert(kHeaderSize == 7);
+ dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
}
block_offset_ = 0;
}
- // Invariant: we never leave <= kHeaderSize bytes in a block.
+ // Invariant: we never leave < kHeaderSize bytes in a block.
const int avail = kBlockSize - block_offset_ - kHeaderSize;
- assert(avail > 0);
+ assert(avail >= 0);
const size_t fragment_length = (left < avail) ? left : avail;
diff --git a/db/repair.cc b/db/repair.cc
index 0727914..745b31a 100644
--- a/db/repair.cc
+++ b/db/repair.cc
@@ -261,7 +261,7 @@ class Repairer {
Status status = env_->GetFileSize(fname, &t->meta.file_size);
if (status.ok()) {
Iterator* iter = table_cache_->NewIterator(
- ReadOptions(), t->meta.number);
+ ReadOptions(), t->meta.number, t->meta.file_size);
bool empty = true;
ParsedInternalKey parsed;
t->max_sequence = 0;
diff --git a/db/table_cache.cc b/db/table_cache.cc
index 604298d..6f750d6 100644
--- a/db/table_cache.cc
+++ b/db/table_cache.cc
@@ -44,6 +44,7 @@ TableCache::~TableCache() {
Iterator* TableCache::NewIterator(const ReadOptions& options,
uint64_t file_number,
+ uint64_t file_size,
Table** tableptr) {
if (tableptr != NULL) {
*tableptr = NULL;
@@ -59,7 +60,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Table* table = NULL;
Status s = env_->NewRandomAccessFile(fname, &file);
if (s.ok()) {
- s = Table::Open(*options_, file, &table);
+ s = Table::Open(*options_, file, file_size, &table);
}
if (!s.ok()) {
diff --git a/db/table_cache.h b/db/table_cache.h
index 6c357df..5564dfc 100644
--- a/db/table_cache.h
+++ b/db/table_cache.h
@@ -23,15 +23,16 @@ class TableCache {
TableCache(const std::string& dbname, const Options* options, int entries);
~TableCache();
- // Get an iterator for the specified file number and return it. If
- // "tableptr" is non-NULL, also sets "*tableptr" to point to the
- // Table object underlying the returned iterator, or NULL if no
- // Table object underlies the returned iterator. The returned
- // "*tableptr" object is owned by the cache and should not be
- // deleted, and is valid for as long as the returned iterator is
- // live.
+ // Return an iterator for the specified file number (the corresponding
+ // file length must be exactly "file_size" bytes). If "tableptr" is
+ // non-NULL, also sets "*tableptr" to point to the Table object
+ // underlying the returned iterator, or NULL if no Table object underlies
+ // the returned iterator. The returned "*tableptr" object is owned by
+ // the cache and should not be deleted, and is valid for as long as the
+ // returned iterator is live.
Iterator* NewIterator(const ReadOptions& options,
uint64_t file_number,
+ uint64_t file_size,
Table** tableptr = NULL);
// Evict any entry for the specified file number
diff --git a/db/version_set.cc b/db/version_set.cc
index caf0b2d..b826e5b 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -75,8 +75,8 @@ Version::~Version() {
// An internal iterator. For a given version/level pair, yields
// information about the files in the level. For a given entry, key()
// is the largest key that occurs in the file, and value() is an
-// 8-byte value containing the file number of the file, encoding using
-// EncodeFixed64.
+// 16-byte value containing the file number and file size, both
+// encoded using EncodeFixed64.
class Version::LevelFileNumIterator : public Iterator {
public:
LevelFileNumIterator(const Version* version,
@@ -129,6 +129,7 @@ class Version::LevelFileNumIterator : public Iterator {
Slice value() const {
assert(Valid());
EncodeFixed64(value_buf_, (*flist_)[index_]->number);
+ EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size);
return Slice(value_buf_, sizeof(value_buf_));
}
virtual Status status() const { return Status::OK(); }
@@ -137,18 +138,21 @@ class Version::LevelFileNumIterator : public Iterator {
const std::vector<FileMetaData*>* const flist_;
int index_;
- mutable char value_buf_[8]; // Used for encoding the file number for value()
+ // Backing store for value(). Holds the file number and size.
+ mutable char value_buf_[16];
};
static Iterator* GetFileIterator(void* arg,
const ReadOptions& options,
const Slice& file_value) {
TableCache* cache = reinterpret_cast<TableCache*>(arg);
- if (file_value.size() != 8) {
+ if (file_value.size() != 16) {
return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value"));
} else {
- return cache->NewIterator(options, DecodeFixed64(file_value.data()));
+ return cache->NewIterator(options,
+ DecodeFixed64(file_value.data()),
+ DecodeFixed64(file_value.data() + 8));
}
}
@@ -164,7 +168,8 @@ void Version::AddIterators(const ReadOptions& options,
// Merge all level zero files together since they may overlap
for (int i = 0; i < files_[0].size(); i++) {
iters->push_back(
- vset_->table_cache_->NewIterator(options, files_[0][i]->number));
+ vset_->table_cache_->NewIterator(
+ options, files_[0][i]->number, files_[0][i]->file_size));
}
// For levels > 0, we can use a concatenating iterator that sequentially
@@ -650,7 +655,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
// approximate offset of "ikey" within the table.
Table* tableptr;
Iterator* iter = table_cache_->NewIterator(
- ReadOptions(), files[i]->number, &tableptr);
+ ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
if (tableptr != NULL) {
result += tableptr->ApproximateOffsetOf(ikey.Encode());
}
@@ -855,7 +860,8 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
if (c->level() + which == 0) {
const std::vector<FileMetaData*>& files = c->inputs_[which];
for (int i = 0; i < files.size(); i++) {
- list[num++] = table_cache_->NewIterator(options, files[i]->number);
+ list[num++] = table_cache_->NewIterator(
+ options, files[i]->number, files[i]->file_size);
}
} else {
// Create concatenating iterator for the files from this level
diff --git a/doc/log_format.txt b/doc/log_format.txt
index 9a801d4..3a0414b 100644
--- a/doc/log_format.txt
+++ b/doc/log_format.txt
@@ -9,12 +9,15 @@ Each block consists of a sequence of records:
type: uint8 // One of FULL, FIRST, MIDDLE, LAST
data: uint8[length]
-A record never starts within the last seven bytes of a block. Any
-leftover bytes here form the trailer, which must consist entirely of
-zero bytes and must be skipped by readers. In particular, even if
-there are exactly seven bytes left in the block, and a zero-length
-user record is added (which will fit in these seven bytes), the writer
-must skip these trailer bytes and add the record to the next block.
+A record never starts within the last six bytes of a block (since it
+won't fit). Any leftover bytes here form the trailer, which must
+consist entirely of zero bytes and must be skipped by readers.
+
+Aside: if exactly seven bytes are left in the current block, and a new
+non-zero length record is added, the writer must emit a FIRST record
+(which contains zero bytes of user data) to fill up the trailing seven
+bytes of the block and then emit all of the user data in subsequent
+blocks.
More types may be added in the future. Some Readers may skip record
types they do not understand, others may report that some data was
diff --git a/include/env.h b/include/env.h
index a728f29..14ddf29 100644
--- a/include/env.h
+++ b/include/env.h
@@ -168,9 +168,6 @@ class RandomAccessFile {
RandomAccessFile() { }
virtual ~RandomAccessFile();
- // Return the length of this file in bytes.
- virtual uint64_t Size() const = 0;
-
// Read up to "n" bytes from the file starting at "offset".
// "scratch[0..n-1]" may be written by this routine. Sets "*result"
// to the data that was read (including if fewer than "n" bytes were
diff --git a/include/table.h b/include/table.h
index 96b2196..c2a4cf9 100644
--- a/include/table.h
+++ b/include/table.h
@@ -20,8 +20,9 @@ struct ReadOptions;
// immutable and persistent.
class Table {
public:
- // Attempt to open the table that is stored in "file", and read the
- // metadata entries necessary to allow retrieving data from the table.
+ // Attempt to open the table that is stored in bytes [0..file_size)
+ // of "file", and read the metadata entries necessary to allow
+ // retrieving data from the table.
//
// If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed.
@@ -33,6 +34,7 @@ class Table {
// *file must remain live while this Table is in use.
static Status Open(const Options& options,
RandomAccessFile* file,
+ uint64_t file_size,
Table** table);
~Table();
diff --git a/table/table.cc b/table/table.cc
index dffc217..bd0fbb5 100644
--- a/table/table.cc
+++ b/table/table.cc
@@ -29,9 +29,9 @@ struct Table::Rep {
Status Table::Open(const Options& options,
RandomAccessFile* file,
+ uint64_t size,
Table** table) {
*table = NULL;
- const uint64_t size = file->Size();
if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable");
}
diff --git a/table/table_test.cc b/table/table_test.cc
index eabb257..d67c58b 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -110,7 +110,7 @@ class StringSource: public RandomAccessFile {
virtual ~StringSource() { }
- virtual uint64_t Size() const { return contents_.size(); }
+ uint64_t Size() const { return contents_.size(); }
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
@@ -246,7 +246,7 @@ class TableConstructor: public Constructor {
source_ = new StringSource(sink.contents());
Options table_options;
table_options.comparator = options.comparator;
- return Table::Open(table_options, source_, &table_);
+ return Table::Open(table_options, source_, sink.contents().size(), &table_);
}
virtual size_t NumBytes() const { return source_->Size(); }
diff --git a/util/env_chromium.cc b/util/env_chromium.cc
index d79406f..834ec2d 100644
--- a/util/env_chromium.cc
+++ b/util/env_chromium.cc
@@ -144,17 +144,13 @@ class ChromiumSequentialFile: public SequentialFile {
class ChromiumRandomAccessFile: public RandomAccessFile {
private:
std::string filename_;
- uint64_t size_;
::base::PlatformFile file_;
public:
- ChromiumRandomAccessFile(const std::string& fname, uint64_t size,
- ::base::PlatformFile file)
- : filename_(fname), size_(size), file_(file) { }
+ ChromiumRandomAccessFile(const std::string& fname, ::base::PlatformFile file)
+ : filename_(fname), file_(file) { }
virtual ~ChromiumRandomAccessFile() { ::base::ClosePlatformFile(file_); }
- virtual uint64_t Size() const { return size_; }
-
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
@@ -256,13 +252,7 @@ class ChromiumEnv : public Env {
*result = NULL;
return Status::IOError(fname, PlatformFileErrorString(error_code));
}
- ::base::PlatformFileInfo info;
- if (!::base::GetPlatformFileInfo(file, &info)) {
- *result = NULL;
- ::base::ClosePlatformFile(file);
- return Status::IOError(fname, PlatformFileErrorString(error_code));
- }
- *result = new ChromiumRandomAccessFile(fname, info.size, file);
+ *result = new ChromiumRandomAccessFile(fname, file);
return Status::OK();
}
diff --git a/util/env_posix.cc b/util/env_posix.cc
index f5174d3..5c58449 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -57,16 +57,13 @@ class PosixSequentialFile: public SequentialFile {
class PosixRandomAccessFile: public RandomAccessFile {
private:
std::string filename_;
- uint64_t size_;
int fd_;
public:
- PosixRandomAccessFile(const std::string& fname, uint64_t size, int fd)
- : filename_(fname), size_(size), fd_(fd) { }
+ PosixRandomAccessFile(const std::string& fname, int fd)
+ : filename_(fname), fd_(fd) { }
virtual ~PosixRandomAccessFile() { close(fd_); }
- virtual uint64_t Size() const { return size_; }
-
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
@@ -286,14 +283,7 @@ class PosixEnv : public Env {
*result = NULL;
return Status::IOError(fname, strerror(errno));
}
- struct stat sbuf;
- if (fstat(fd, &sbuf) != 0) {
- *result = NULL;
- Status s = Status::IOError(fname, strerror(errno));
- close(fd);
- return s;
- }
- *result = new PosixRandomAccessFile(fname, sbuf.st_size, fd);
+ *result = new PosixRandomAccessFile(fname, fd);
return Status::OK();
}