Skip to content

Commit

Permalink
MB-51373: Inspect and correct Item objects created by KVStore
Browse files Browse the repository at this point in the history
MB-52793 exposed a bug in the handing of deletes which have a body
(for System XATTRS). The root cause of that bug has been addressed
under that bug, however the problem remains that /previous/ versions
of KV-Engine could have written invalid deleted documents to disk,
which could be encountered after an (offline) upgrade.

Create a function that Couch/Magma-KVStore should call when
they have created an Item from the underlying stored data.
The function inspects the Item for datatype anomalies and if
found logs and corrects the Item preventing exceptions
occurring further up the stack.

In this case we check for an Item with no value, but a datatype,
which in the case of datatype=xattr can cause faults in xattr
inspection code.

Also adds a regression test which verifies that the sanitiztion of
such items is correctly triggered when reading documents from disk in
the various ways we access them.

Change-Id: I235af07a1973c4af35301e17223e624a2cb5acf0
Reviewed-on: https://review.couchbase.org/c/kv_engine/+/177217
Reviewed-by: Trond Norbye <[email protected]>
Reviewed-by: Jim Walker <[email protected]>
Well-Formed: Restriction Checker
Tested-by: Build Bot <[email protected]>
  • Loading branch information
daverigby committed Jul 11, 2022
1 parent 8855aeb commit ad47f53
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 9 deletions.
34 changes: 25 additions & 9 deletions engines/ep/src/couch-kvstore/couch-kvstore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,27 @@ static sized_buf to_sized_buf(const DiskDocKey& key) {

/**
* Helper function to create an Item from couchstore DocInfo & related types.
* @param vbid The vbucket the Item belongs to
* @param docinfo The id, db_seq and rev_seq are used in the Item construction
* @param metadata The metadata to use to create the Item
* @param value an optional value, this allows "key-only" paths to be
* distinguishable from a value of 0 length.
*/
static std::unique_ptr<Item> makeItemFromDocInfo(Vbid vbid,
const DocInfo& docinfo,
const MetaData& metadata,
sized_buf value) {
boost::optional<sized_buf> value) {

value_t body;
if (value) {
body.reset(Blob::New(value->buf, value->size));
}
// Strip off the DurabilityPrepare namespace (if present) from the persisted
// dockey before we create the in-memory item.
auto item = std::make_unique<Item>(makeDiskDocKey(docinfo.id).getDocKey(),
metadata.getFlags(),
metadata.getExptime(),
value.buf,
value.size,
body,
metadata.getDataType(),
metadata.getCas(),
docinfo.db_seq,
Expand All @@ -159,6 +168,8 @@ static std::unique_ptr<Item> makeItemFromDocInfo(Vbid vbid,
item->setDeleted(metadata.getDeleteSource());
}

KVStore::checkAndFixKVStoreCreatedItem(*item);

if (metadata.getVersionInitialisedFrom() == MetaData::Version::V3) {
// Metadata is from a SyncWrite - update the Item appropriately.
switch (metadata.getDurabilityOp()) {
Expand Down Expand Up @@ -843,7 +854,7 @@ static int notify_expired_item(DocInfo& info,
sized_buf item,
compaction_ctx& ctx,
time_t currtime) {
sized_buf data{nullptr, 0};
boost::optional<sized_buf> data;
cb::compression::Buffer inflated;

if (mcbp::datatype::is_xattr(metadata.getDataType())) {
Expand All @@ -870,7 +881,7 @@ static int notify_expired_item(DocInfo& info,
// Now remove snappy bit
metadata.setDataType(metadata.getDataType() &
~PROTOCOL_BINARY_DATATYPE_SNAPPY);
data = {inflated.data(), inflated.size()};
data = sized_buf{inflated.data(), inflated.size()};
}
}

Expand Down Expand Up @@ -1952,9 +1963,12 @@ couchstore_error_t CouchKVStore::fetchDoc(Db* db,
return COUCHSTORE_ERROR_DB_NO_LONGER_VALID;
}

if (metaOnly == GetMetaOnly::Yes) {
const bool forceValueFetch = isDocumentPotentiallyCorruptedByMB52793(
docinfo->deleted, *metadata);
if (metaOnly == GetMetaOnly::Yes && !forceValueFetch) {
// Can skip reading document value.
auto it = makeItemFromDocInfo(
vbId, *docinfo, *metadata, {nullptr, docinfo->size});
vbId, *docinfo, *metadata, boost::none);

docValue = GetValue(std::move(it));
// update ep-engine IO stats
Expand Down Expand Up @@ -2015,7 +2029,6 @@ int CouchKVStore::recordDbDump(Db *db, DocInfo *docinfo, void *ctx) {
auto* cl = sctx->lookup.get();

Doc *doc = nullptr;
sized_buf value{nullptr, 0};
uint64_t byseqno = docinfo->db_seq;
Vbid vbucketId = sctx->vbid;

Expand Down Expand Up @@ -2053,7 +2066,10 @@ int CouchKVStore::recordDbDump(Db *db, DocInfo *docinfo, void *ctx) {

auto metadata = MetaDataFactory::createMetaData(docinfo->rev_meta);

if (sctx->valFilter != ValueFilter::KEYS_ONLY) {
boost::optional<sized_buf> value;
const bool forceValueFetch = isDocumentPotentiallyCorruptedByMB52793(
docinfo->deleted, *metadata);
if (sctx->valFilter != ValueFilter::KEYS_ONLY || forceValueFetch) {
couchstore_open_options openOptions = 0;

/**
Expand Down
43 changes: 43 additions & 0 deletions engines/ep/src/kvstore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <platform/dirutils.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <utilities/logtags.h>

ScanContext::ScanContext(
std::shared_ptr<StatusCallback<GetValue>> cb,
Expand Down Expand Up @@ -713,3 +714,45 @@ IORequest::~IORequest() = default;
bool IORequest::isDelete() const {
return item->isDeleted() && !item->isPending();
}

bool KVStore::isDocumentPotentiallyCorruptedByMB52793(
bool deleted, const MetaData& metadata) {
// As per MB-52793, Deleted documents with a zero length value can
// incorrectly end up with the datatype set to XATTR (when it should be
// RAW_BYTES), if it was previously a deleted document /with/ a value of
// system XATTRs.
// To be able to fixup such documents we need to know more than just the
// metadata, as the value size is stored as part of the value. As such;
// return true if it meets all the criteria which metadata informs us of.
return deleted && metadata.getDataType() == PROTOCOL_BINARY_DATATYPE_XATTR;
}

// MB-51373: Fix the datatype of invalid documents. Currently checks for
// datatype ! raw but no value, this invalid document has been seen in
// production deployments and reading them can lead to a restart
bool KVStore::checkAndFixKVStoreCreatedItem(Item& item) {
#if CB_DEVELOPMENT_ASSERTS
if (item.isDeleted() &&
item.getDataType() == PROTOCOL_BINARY_DATATYPE_XATTR) {
// If we encounter a potential invalid doc (MB-51373) - Delete with
// datatype XATTR, we should have its value to be able to verify it
// is correct, or otherwise sanitise it.
Expects(item.getValue());
}
#endif
if (item.isDeleted() &&
item.getValue() &&
item.getNBytes() == 0 &&
item.getDataType() != PROTOCOL_BINARY_RAW_BYTES) {
std::stringstream ss;
ss << item;
EP_LOG_WARN(
"KVStore::checkAndFixKVStoreCreatedItem: {} correcting invalid "
"datatype {}",
item.getVBucketId(),
cb::UserDataView(ss.str()).getSanitizedValue());
item.setDataType(PROTOCOL_BINARY_RAW_BYTES);
return true;
}
return false;
}
23 changes: 23 additions & 0 deletions engines/ep/src/kvstore.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class DiskDocKey;
class Item;
class KVStore;
class KVStoreConfig;
class MetaData;
class PersistenceCallback;
class RollbackCB;
class RollbackResult;
Expand Down Expand Up @@ -926,6 +927,28 @@ class KVStore {
postFlushHook = hook;
}

/**
* Check if the specified document metadata is /potentially/ affected
* by a datatype corruption issue (MB-52793) - a deleted document with
* zero length value has an incorrect datatype.
*
* @return True if the document is /potentially/ affected and hence further
* analysis is needed (such as fetching the document body for
* additional checks).
*/
static bool isDocumentPotentiallyCorruptedByMB52793(
bool deleted, const MetaData& metadata);

/**
* Function inspects the Item for some known issues that may exist in
* persisted data (possibly from older releases and now present due to
* upgrade). If an inconsistency is found it will log a fix the Item.
*
* @param item [in/out] the Item to check and if needed, fix.
* @return true if the Item was changed by the function because of an issue
*/
static bool checkAndFixKVStoreCreatedItem(Item& item);

protected:
/**
* Prepare for delete of the vbucket file - Implementation specific method
Expand Down
4 changes: 4 additions & 0 deletions engines/ep/src/magma-kvstore/magma-kvstore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1267,6 +1267,10 @@ std::unique_ptr<Item> MagmaKVStore::makeItem(Vbid vb,
vb,
meta.revSeqno);

if (filter != ValueFilter::KEYS_ONLY) {
checkAndFixKVStoreCreatedItem(*item);
}

if (meta.deleted) {
item->setDeleted(static_cast<DeleteSource>(meta.deleteSource));
}
Expand Down
97 changes: 97 additions & 0 deletions engines/ep/tests/module_tests/evp_store_single_threaded_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5996,3 +5996,100 @@ TEST_P(STParamPersistentBucketTest,
EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES, v->getDatatype());
}
}

TEST_P(STParamPersistentBucketTest,
SanitizeOnDiskDeletedDocWithIncorrectXATTRFull) {
testSanitizeOnDiskDeletedDocWithIncorrectXATTR(false);
}

TEST_P(STParamPersistentBucketTest,
SanitizeOnDiskDeletedDocWithIncorrectXATTRMetaOnly) {
testSanitizeOnDiskDeletedDocWithIncorrectXATTR(true);
}

void STParamPersistentBucketTest::
testSanitizeOnDiskDeletedDocWithIncorrectXATTR(bool fetchMetaOnly) {
setVBucketStateAndRunPersistTask(vbid, vbucket_state_active);

const auto key = makeStoredDocKey("key");

// Store an initial item for us to rollback to (see end of test).
const auto initalItem = store_item(vbid, key, "value");
flushVBucketToDiskIfPersistent(vbid, 1);

// Construct a document on-disk which is Deleted, zero value,
// with datatype=XATTR.
setVBucketStateAndRunPersistTask(vbid, vbucket_state_replica);
auto item = make_item(vbid, key, {}, 0, PROTOCOL_BINARY_DATATYPE_XATTR);
item.setCas(1234);
item.setDeleted(DeleteSource::Explicit);

uint64_t seqno;
ASSERT_EQ(ENGINE_SUCCESS,
store->setWithMeta(item,
0 /* cas */,
&seqno,
cookie,
{vbucket_state_replica},
CheckConflicts::No,
/*allowExisting*/ true));
flushVBucketToDiskIfPersistent(vbid, 1);

// Re-fetch from disk to confirm it is correctly sanitized.
// Need to be active to be able to fetch from store APIs.
setVBucketStateAndRunPersistTask(vbid, vbucket_state_active);

auto fetchDocAndCheck = [&]() {
if (fetchMetaOnly) {
ItemMetaData metadata;
uint32_t deleted;
uint8_t datatype;
ASSERT_EQ(ENGINE_EWOULDBLOCK,
store->getMetaData(key,
vbid,
cookie,
metadata,
deleted,
datatype));
runBGFetcherTask();
ASSERT_EQ(ENGINE_SUCCESS,
store->getMetaData(key,
vbid,
cookie,
metadata,
deleted,
datatype));
EXPECT_TRUE(deleted);
EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES, datatype);
} else {
// Fetch entire document and check sanitized.
get_options_t options = static_cast<get_options_t>(
QUEUE_BG_FETCH | HONOR_STATES | TRACK_REFERENCE |
DELETE_TEMP | HIDE_LOCKED_CAS | TRACK_STATISTICS |
GET_DELETED_VALUE);
auto gv = store->get(key, vbid, cookie, options);
ASSERT_EQ(ENGINE_EWOULDBLOCK, gv.getStatus());

runBGFetcherTask();
gv = store->get(key, vbid, cookie, GET_DELETED_VALUE);
EXPECT_EQ(ENGINE_SUCCESS, gv.getStatus());
EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES,
gv.item->getDataType());
}
};
fetchDocAndCheck();

// Restart and warmup, checking that the invalid document does not cause
// issues at warmup.
resetEngineAndWarmup();
fetchDocAndCheck();

// Finally trigger a rollback of the problematic document, and
// confirm the rollback is successful (it must perform a KVStore scan
// which covers the affected document.
setVBucketStateAndRunPersistTask(vbid, vbucket_state_replica);
ASSERT_EQ(TaskStatus::Complete,
store->rollback(vbid, initalItem.getBySeqno()));


}
10 changes: 10 additions & 0 deletions engines/ep/tests/module_tests/evp_store_single_threaded_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,4 +399,14 @@ class STParamPersistentBucketTest : public STParameterizedBucketTest {
void testCompactionPersistedDeletes(bool dropDeletes);

void testFailoverTableEntryPersistedAtWarmup(std::function<void()>);

/**
* Test for MB-51373 - if we end up with a deleted document on-disk with
* an empty value but datatype=XATTR (when it should be RAW_BYTES - bug
* MB-52793), then we should sanitize that value when it is loaded from
* disk.
* @param fetchMetaOnly If true then when fetching corrupted doc from disk,
* only fetch metadata, else fetch entire document.
*/
void testSanitizeOnDiskDeletedDocWithIncorrectXATTR(bool fetchMetaOnly);
};

0 comments on commit ad47f53

Please sign in to comment.