From e11bdf1935bc5a46db790ef414110149009f8c6a Mon Sep 17 00:00:00 2001 From: "jorlow@chromium.org" Date: Fri, 25 Mar 2011 20:27:43 +0000 Subject: Upstream changes git-svn-id: https://leveldb.googlecode.com/svn/trunk@15 62dab493-f737-651d-591e-8d6aee1b9529 --- db/db_bench.cc | 55 +++++++- db/db_iter.cc | 418 +++++++++++++++++++++++++++------------------------------ db/db_test.cc | 184 +++++++++++++++++++++++++ 3 files changed, 435 insertions(+), 222 deletions(-) (limited to 'db') diff --git a/db/db_bench.cc b/db/db_bench.cc index 72e0699..7026ca1 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -11,6 +11,8 @@ #include "include/db.h" #include "include/env.h" #include "include/write_batch.h" +#include "port/port.h" +#include "util/crc32c.h" #include "util/histogram.h" #include "util/random.h" #include "util/testutil.h" @@ -25,6 +27,8 @@ // readseq -- read N values sequentially // readreverse -- read N values in reverse order // readrandom -- read N values in random order +// crc32c -- repeated crc32c of 4K of data +// sha1 -- repeated SHA1 computation over 4K of data // Meta operations: // compact -- Compact the entire DB // heapprofile -- Dump a heap profile (if supported by this port) @@ -34,17 +38,21 @@ // normal -- reset N back to its normal value (1000000) static const char* FLAGS_benchmarks = "fillseq," + "fillsync," "fillrandom," "overwrite," - "fillsync," + "readrandom," + "readrandom," // Extra run to allow previous compactions to quiesce "readseq," "readreverse," - "readrandom," "compact," + "readrandom," "readseq," "readreverse," - "readrandom," - "fill100K"; + "fill100K," + "crc32c," + "sha1" + ; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -330,6 +338,10 @@ class Benchmark { ReadRandom(); } else if (name == Slice("compact")) { Compact(); + } else if (name == Slice("crc32c")) { + Crc32c(4096, "(4K per op)"); + } else if (name == Slice("sha1")) { + SHA1(4096, "(4K per op)"); } else if (name == Slice("heapprofile")) { HeapProfile(); } else { @@ -340,6 +352,41 @@ class Benchmark { } private: + void Crc32c(int size, const char* label) { + // Checksum about 500MB of data total + string data(size, 'x'); + int64_t bytes = 0; + uint32_t crc = 0; + while (bytes < 500 * 1048576) { + crc = crc32c::Value(data.data(), size); + FinishedSingleOp(); + bytes += size; + } + // Print so result is not dead + fprintf(stderr, "... crc=0x%x\r", static_cast(crc)); + + bytes_ = bytes; + message_ = label; + } + + void SHA1(int size, const char* label) { + // SHA1 about 100MB of data total + string data(size, 'x'); + int64_t bytes = 0; + char sha1[20]; + while (bytes < 100 * 1048576) { + port::SHA1_Hash(data.data(), size, sha1); + FinishedSingleOp(); + bytes += size; + } + + // Print so result is not dead + fprintf(stderr, "... sha1=%02x...\r", static_cast(sha1[0])); + + bytes_ = bytes; + message_ = label; + } + void Open() { assert(db_ == NULL); Options options; diff --git a/db/db_iter.cc b/db/db_iter.cc index 165d7d4..6726b51 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -36,6 +36,16 @@ namespace { // numbers, deletion markers, overwrites, etc. class DBIter: public Iterator { public: + // Which direction is the iterator currently moving? + // (1) When moving forward, the internal iterator is positioned at + // the exact entry that yields this->key(), this->value() + // (2) When moving backwards, the internal iterator is positioned + // just before all entries whose user key == this->key(). + enum Direction { + kForward, + kReverse + }; + DBIter(const std::string* dbname, Env* env, const Comparator* cmp, Iterator* iter, SequenceNumber s) : dbname_(dbname), @@ -44,6 +54,7 @@ class DBIter: public Iterator { iter_(iter), sequence_(s), large_(NULL), + direction_(kForward), valid_(false) { } virtual ~DBIter() { @@ -53,48 +64,21 @@ class DBIter: public Iterator { virtual bool Valid() const { return valid_; } virtual Slice key() const { assert(valid_); - return key_; + return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_; } virtual Slice value() const { assert(valid_); + Slice raw_value = (direction_ == kForward) ? iter_->value() : saved_value_; if (large_ == NULL) { - return value_; + return raw_value; } else { MutexLock l(&large_->mutex); if (!large_->produced) { - ReadIndirectValue(); + ReadIndirectValue(raw_value); } return large_->value; } } - - virtual void Next() { - assert(valid_); - // iter_ is already positioned past DBIter::key() - FindNextUserEntry(); - } - - virtual void Prev() { - assert(valid_); - bool ignored; - ScanUntilBeforeCurrentKey(&ignored); - FindPrevUserEntry(); - } - - virtual void Seek(const Slice& target) { - ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek); - std::string tmp; - AppendInternalKey(&tmp, ikey); - iter_->Seek(tmp); - FindNextUserEntry(); - } - virtual void SeekToFirst() { - iter_->SeekToFirst(); - FindNextUserEntry(); - } - - virtual void SeekToLast(); - virtual Status status() const { if (status_.ok()) { if (large_ != NULL && !large_->status.ok()) return large_->status; @@ -104,23 +88,13 @@ class DBIter: public Iterator { } } - private: - void FindNextUserEntry(); - void FindPrevUserEntry(); - void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); } - void SaveValue(const Slice& v) { - if (value_.capacity() > v.size() + 1048576) { - std::string empty; - swap(empty, value_); - } - value_.assign(v.data(), v.size()); - } - bool ParseKey(ParsedInternalKey* key); - void SkipPast(const Slice& k); - void ScanUntilBeforeCurrentKey(bool* found_live); - - void ReadIndirectValue() const; + virtual void Next(); + virtual void Prev(); + virtual void Seek(const Slice& target); + virtual void SeekToFirst(); + virtual void SeekToLast(); + private: struct Large { port::Mutex mutex; std::string value; @@ -128,19 +102,42 @@ class DBIter: public Iterator { Status status; }; + void FindNextUserEntry(bool skipping, std::string* skip); + void FindPrevUserEntry(); + bool ParseKey(ParsedInternalKey* key); + void ReadIndirectValue(Slice ref) const; + + inline void SaveKey(const Slice& k, std::string* dst) { + dst->assign(k.data(), k.size()); + } + + inline void ForgetLargeValue() { + if (large_ != NULL) { + delete large_; + large_ = NULL; + } + } + + inline void ClearSavedValue() { + if (saved_value_.capacity() > 1048576) { + std::string empty; + swap(empty, saved_value_); + } else { + saved_value_.clear(); + } + } + const std::string* const dbname_; Env* const env_; - const Comparator* const user_comparator_; - - // iter_ is positioned just past current entry for DBIter if valid_ Iterator* const iter_; - SequenceNumber const sequence_; + Status status_; - std::string key_; // Always a user key - std::string value_; - Large* large_; // Non-NULL if value is an indirect reference + std::string saved_key_; // == current key when direction_==kReverse + std::string saved_value_; // == current raw value when direction_==kReverse + Large* large_; // Non-NULL if value is an indirect reference + Direction direction_; bool valid_; // No copying allowed @@ -157,204 +154,189 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) { } } -void DBIter::FindNextUserEntry() { - if (large_ != NULL) { - if (status_.ok() && !large_->status.ok()) { - status_ = large_->status; - } - delete large_; - large_ = NULL; - } - while (iter_->Valid()) { - ParsedInternalKey ikey; - if (!ParseKey(&ikey)) { - // Skip past corrupted entry - iter_->Next(); - continue; - } - if (ikey.sequence > sequence_) { - // Ignore entries newer than the snapshot +void DBIter::Next() { + assert(valid_); + ForgetLargeValue(); + + if (direction_ == kReverse) { // Switch directions? + direction_ = kForward; + // iter_ is pointing just before the entries for this->key(), + // so advance into the range of entries for this->key() and then + // use the normal skipping code below. + if (!iter_->Valid()) { + iter_->SeekToFirst(); + } else { iter_->Next(); - continue; } - - switch (ikey.type) { - case kTypeDeletion: - SaveKey(ikey.user_key); // Make local copy for use by SkipPast() - iter_->Next(); - SkipPast(key_); - // Do not return deleted entries. Instead keep looping. - break; - - case kTypeValue: - SaveKey(ikey.user_key); - SaveValue(iter_->value()); - iter_->Next(); - SkipPast(key_); - // Yield the value we just found. - valid_ = true; - return; - - case kTypeLargeValueRef: - SaveKey(ikey.user_key); - // Save the large value ref as value_, and read it lazily on a call - // to value() - SaveValue(iter_->value()); - large_ = new Large; - large_->produced = false; - iter_->Next(); - SkipPast(key_); - // Yield the value we just found. - valid_ = true; - return; + if (!iter_->Valid()) { + valid_ = false; + saved_key_.clear(); + return; } } - valid_ = false; - key_.clear(); - value_.clear(); - assert(large_ == NULL); + + // Temporarily use saved_key_ as storage for key to skip. + std::string* skip = &saved_key_; + SaveKey(ExtractUserKey(iter_->key()), skip); + FindNextUserEntry(true, skip); } -void DBIter::SkipPast(const Slice& k) { - while (iter_->Valid()) { +void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { + // Loop until we hit an acceptable entry to yield + assert(iter_->Valid()); + assert(direction_ == kForward); + assert(large_ == NULL); + do { ParsedInternalKey ikey; - // Note that if we cannot parse an internal key, we keep looping - // so that if we have a run like the following: - // => value100 - // - // => value50 - // we will skip over the corrupted entry as well as value50. - if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) { - break; + if (ParseKey(&ikey) && ikey.sequence <= sequence_) { + switch (ikey.type) { + case kTypeDeletion: + // Arrange to skip all upcoming entries for this key since + // they are hidden by this deletion. + SaveKey(ikey.user_key, skip); + skipping = true; + break; + case kTypeValue: + case kTypeLargeValueRef: + if (skipping && + user_comparator_->Compare(ikey.user_key, *skip) <= 0) { + // Entry hidden + } else { + valid_ = true; + saved_key_.clear(); + if (ikey.type == kTypeLargeValueRef) { + large_ = new Large; + large_->produced = false; + } + return; + } + break; + } } iter_->Next(); - } + } while (iter_->Valid()); + saved_key_.clear(); + valid_ = false; } -void DBIter::SeekToLast() { - // Position iter_ at the last uncorrupted user key and then - // let FindPrevUserEntry() do the heavy lifting to find - // a user key that is live. - iter_->SeekToLast(); - ParsedInternalKey current; - while (iter_->Valid() && !ParseKey(¤t)) { - iter_->Prev(); - } - if (iter_->Valid()) { - SaveKey(current.user_key); - } - FindPrevUserEntry(); -} +void DBIter::Prev() { + assert(valid_); + ForgetLargeValue(); -// Let X be the user key at which iter_ is currently positioned. -// Adjust DBIter to point at the last entry with a key <= X that -// has a live value. -void DBIter::FindPrevUserEntry() { - // Consider the following example: - // - // A@540 - // A@400 - // - // B@300 - // B@200 - // B@100 <- iter_ - // - // C@301 - // C@201 - // - // The comments marked "(first iteration)" below relate what happens - // for the preceding example in the first iteration of the while loop - // below. There may be more than one iteration either if there are - // no live values for B, or if there is a corruption. - while (iter_->Valid()) { - std::string saved = key_; - bool found_live; - ScanUntilBeforeCurrentKey(&found_live); - // (first iteration) iter_ at A@400 - if (found_live) { - // Step forward into range of entries with user key >= saved + if (direction_ == kForward) { // Switch directions? + // iter_ is pointing at the current entry. Scan backwards until + // the key changes so we can use the normal reverse scanning code. + assert(iter_->Valid()); // Otherwise valid_ would have been false + SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + while (true) { + iter_->Prev(); if (!iter_->Valid()) { - iter_->SeekToFirst(); - } else { - iter_->Next(); - } - // (first iteration) iter_ at B@300 - - FindNextUserEntry(); // Sets key_ to the key of the next value it found - if (valid_ && user_comparator_->Compare(key_, saved) == 0) { - // (first iteration) iter_ at C@301 + valid_ = false; + saved_key_.clear(); + ClearSavedValue(); return; } - - // FindNextUserEntry() could not find any entries under the - // user key "saved". This is probably a corruption since - // ScanUntilBefore(saved) found a live value. So we skip - // backwards to an earlier key and ignore the corrupted - // entries for "saved". - // - // (first iteration) iter_ at C@301 and saved == "B" - key_ = saved; - bool ignored; - ScanUntilBeforeCurrentKey(&ignored); - // (first iteration) iter_ at A@400 + if (user_comparator_->Compare(ExtractUserKey(iter_->key()), + saved_key_) < 0) { + break; + } } + direction_ = kReverse; } - valid_ = false; - key_.clear(); - value_.clear(); + + FindPrevUserEntry(); } -void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) { - *found_live = false; - if (!iter_->Valid()) { - iter_->SeekToLast(); - } +void DBIter::FindPrevUserEntry() { + assert(direction_ == kReverse); + assert(large_ == NULL); - while (iter_->Valid()) { - ParsedInternalKey current; - if (!ParseKey(¤t)) { + ValueType value_type = kTypeDeletion; + if (iter_->Valid()) { + SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + do { + ParsedInternalKey ikey; + if (ParseKey(&ikey) && ikey.sequence <= sequence_) { + if ((value_type != kTypeDeletion) && + user_comparator_->Compare(ikey.user_key, saved_key_) < 0) { + // We encountered a non-deleted value in entries for previous keys, + break; + } + value_type = ikey.type; + if (value_type == kTypeDeletion) { + ClearSavedValue(); + } else { + Slice raw_value = iter_->value(); + if (saved_value_.capacity() > raw_value.size() + 1048576) { + std::string empty; + swap(empty, saved_value_); + } + saved_value_.assign(raw_value.data(), raw_value.size()); + } + } iter_->Prev(); - continue; - } + } while (iter_->Valid()); + } - if (current.sequence > sequence_) { - // Ignore entries that are serialized after this read - iter_->Prev(); - continue; + if (value_type == kTypeDeletion) { + // End + valid_ = false; + saved_key_.clear(); + ClearSavedValue(); + direction_ = kForward; + } else { + valid_ = true; + if (value_type == kTypeLargeValueRef) { + large_ = new Large; + large_->produced = false; } + } +} - const int cmp = user_comparator_->Compare(current.user_key, key_); - if (cmp < 0) { - SaveKey(current.user_key); - return; - } else if (cmp == 0) { - switch (current.type) { - case kTypeDeletion: - *found_live = false; - break; - - case kTypeValue: - case kTypeLargeValueRef: - *found_live = true; - break; - } - } else { // cmp > 0 - *found_live = false; - } +void DBIter::Seek(const Slice& target) { + direction_ = kForward; + ForgetLargeValue(); + ClearSavedValue(); + saved_key_.clear(); + AppendInternalKey( + &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); + iter_->Seek(saved_key_); + if (iter_->Valid()) { + FindNextUserEntry(false, &saved_key_ /* temporary storage */); + } else { + valid_ = false; + } +} - iter_->Prev(); +void DBIter::SeekToFirst() { + direction_ = kForward; + ForgetLargeValue(); + ClearSavedValue(); + iter_->SeekToFirst(); + if (iter_->Valid()) { + FindNextUserEntry(false, &saved_key_ /* temporary storage */); + } else { + valid_ = false; } } -void DBIter::ReadIndirectValue() const { +void DBIter::SeekToLast() { + direction_ = kReverse; + ForgetLargeValue(); + ClearSavedValue(); + iter_->SeekToLast(); + FindPrevUserEntry(); +} + +void DBIter::ReadIndirectValue(Slice ref) const { assert(!large_->produced); large_->produced = true; LargeValueRef large_ref; - if (value_.size() != LargeValueRef::ByteSize()) { + if (ref.size() != LargeValueRef::ByteSize()) { large_->status = Status::Corruption("malformed large value reference"); return; } - memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize()); + memcpy(large_ref.data, ref.data(), LargeValueRef::ByteSize()); std::string fname = LargeValueFileName(*dbname_, large_ref); RandomAccessFile* file; Status s = env_->NewRandomAccessFile(fname, &file); diff --git a/db/db_test.cc b/db/db_test.cc index 888c560..0414176 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -209,6 +209,16 @@ class DBTest { } } } + + std::string IterStatus(Iterator* iter) { + std::string result; + if (iter->Valid()) { + result = iter->key().ToString() + "->" + iter->value().ToString(); + } else { + result = "(invalid)"; + } + return result; + } }; TEST(DBTest, Empty) { @@ -234,6 +244,180 @@ TEST(DBTest, PutDeleteGet) { ASSERT_EQ("NOT_FOUND", Get("foo")); } +TEST(DBTest, IterEmpty) { + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("foo"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; +} + +TEST(DBTest, IterSingle) { + ASSERT_OK(Put("a", "va")); + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek(""); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("a"); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek("b"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; +} + +TEST(DBTest, IterMulti) { + ASSERT_OK(Put("a", "va")); + ASSERT_OK(Put("b", "vb")); + ASSERT_OK(Put("c", "vc")); + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->Seek(""); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Seek("a"); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Seek("ax"); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Seek("b"); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Seek("z"); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + // Switch from reverse to forward + iter->SeekToLast(); + iter->Prev(); + iter->Prev(); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + + // Switch from forward to reverse + iter->SeekToFirst(); + iter->Next(); + iter->Next(); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + + // Make sure iter stays at snapshot + ASSERT_OK(Put("a", "va2")); + ASSERT_OK(Put("a2", "va3")); + ASSERT_OK(Put("b", "vb2")); + ASSERT_OK(Put("c", "vc2")); + ASSERT_OK(Delete("b")); + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->vb"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; +} + +TEST(DBTest, IterSmallAndLargeMix) { + ASSERT_OK(Put("a", "va")); + ASSERT_OK(Put("b", std::string(100000, 'b'))); + ASSERT_OK(Put("c", "vc")); + ASSERT_OK(Put("d", std::string(100000, 'd'))); + ASSERT_OK(Put("e", std::string(100000, 'e'))); + + Iterator* iter = db_->NewIterator(ReadOptions()); + + iter->SeekToFirst(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + iter->SeekToLast(); + ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "c->vc"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->va"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "(invalid)"); + + delete iter; +} + TEST(DBTest, Recover) { ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("baz", "v5")); -- cgit v1.2.1