diff --git a/engines/ep/src/couch-kvstore/couch-kvstore.cc b/engines/ep/src/couch-kvstore/couch-kvstore.cc index db678cdff4..bea130b7c1 100644 --- a/engines/ep/src/couch-kvstore/couch-kvstore.cc +++ b/engines/ep/src/couch-kvstore/couch-kvstore.cc @@ -138,18 +138,27 @@ static sized_buf to_sized_buf(const DiskDocKey& key) { /** * Helper function to create an Item from couchstore DocInfo & related types. + * @param vbid The vbucket the Item belongs to + * @param docinfo The id, db_seq and rev_seq are used in the Item construction + * @param metadata The metadata to use to create the Item + * @param value an optional value, this allows "key-only" paths to be + * distinguishable from a value of 0 length. */ static std::unique_ptr makeItemFromDocInfo(Vbid vbid, const DocInfo& docinfo, const MetaData& metadata, - sized_buf value) { + boost::optional value) { + + value_t body; + if (value) { + body.reset(Blob::New(value->buf, value->size)); + } // Strip off the DurabilityPrepare namespace (if present) from the persisted // dockey before we create the in-memory item. auto item = std::make_unique(makeDiskDocKey(docinfo.id).getDocKey(), metadata.getFlags(), metadata.getExptime(), - value.buf, - value.size, + body, metadata.getDataType(), metadata.getCas(), docinfo.db_seq, @@ -159,6 +168,8 @@ static std::unique_ptr makeItemFromDocInfo(Vbid vbid, item->setDeleted(metadata.getDeleteSource()); } + KVStore::checkAndFixKVStoreCreatedItem(*item); + if (metadata.getVersionInitialisedFrom() == MetaData::Version::V3) { // Metadata is from a SyncWrite - update the Item appropriately. switch (metadata.getDurabilityOp()) { @@ -843,7 +854,7 @@ static int notify_expired_item(DocInfo& info, sized_buf item, compaction_ctx& ctx, time_t currtime) { - sized_buf data{nullptr, 0}; + boost::optional data; cb::compression::Buffer inflated; if (mcbp::datatype::is_xattr(metadata.getDataType())) { @@ -870,7 +881,7 @@ static int notify_expired_item(DocInfo& info, // Now remove snappy bit metadata.setDataType(metadata.getDataType() & ~PROTOCOL_BINARY_DATATYPE_SNAPPY); - data = {inflated.data(), inflated.size()}; + data = sized_buf{inflated.data(), inflated.size()}; } } @@ -1952,9 +1963,12 @@ couchstore_error_t CouchKVStore::fetchDoc(Db* db, return COUCHSTORE_ERROR_DB_NO_LONGER_VALID; } - if (metaOnly == GetMetaOnly::Yes) { + const bool forceValueFetch = isDocumentPotentiallyCorruptedByMB52793( + docinfo->deleted, *metadata); + if (metaOnly == GetMetaOnly::Yes && !forceValueFetch) { + // Can skip reading document value. auto it = makeItemFromDocInfo( - vbId, *docinfo, *metadata, {nullptr, docinfo->size}); + vbId, *docinfo, *metadata, boost::none); docValue = GetValue(std::move(it)); // update ep-engine IO stats @@ -2015,7 +2029,6 @@ int CouchKVStore::recordDbDump(Db *db, DocInfo *docinfo, void *ctx) { auto* cl = sctx->lookup.get(); Doc *doc = nullptr; - sized_buf value{nullptr, 0}; uint64_t byseqno = docinfo->db_seq; Vbid vbucketId = sctx->vbid; @@ -2053,7 +2066,10 @@ int CouchKVStore::recordDbDump(Db *db, DocInfo *docinfo, void *ctx) { auto metadata = MetaDataFactory::createMetaData(docinfo->rev_meta); - if (sctx->valFilter != ValueFilter::KEYS_ONLY) { + boost::optional value; + const bool forceValueFetch = isDocumentPotentiallyCorruptedByMB52793( + docinfo->deleted, *metadata); + if (sctx->valFilter != ValueFilter::KEYS_ONLY || forceValueFetch) { couchstore_open_options openOptions = 0; /** diff --git a/engines/ep/src/kvstore.cc b/engines/ep/src/kvstore.cc index eb9ff3cf71..89c4fed233 100644 --- a/engines/ep/src/kvstore.cc +++ b/engines/ep/src/kvstore.cc @@ -43,6 +43,7 @@ #include #include #include +#include ScanContext::ScanContext( std::shared_ptr> cb, @@ -713,3 +714,45 @@ IORequest::~IORequest() = default; bool IORequest::isDelete() const { return item->isDeleted() && !item->isPending(); } + +bool KVStore::isDocumentPotentiallyCorruptedByMB52793( + bool deleted, const MetaData& metadata) { + // As per MB-52793, Deleted documents with a zero length value can + // incorrectly end up with the datatype set to XATTR (when it should be + // RAW_BYTES), if it was previously a deleted document /with/ a value of + // system XATTRs. + // To be able to fixup such documents we need to know more than just the + // metadata, as the value size is stored as part of the value. As such; + // return true if it meets all the criteria which metadata informs us of. + return deleted && metadata.getDataType() == PROTOCOL_BINARY_DATATYPE_XATTR; +} + +// MB-51373: Fix the datatype of invalid documents. Currently checks for +// datatype ! raw but no value, this invalid document has been seen in +// production deployments and reading them can lead to a restart +bool KVStore::checkAndFixKVStoreCreatedItem(Item& item) { +#if CB_DEVELOPMENT_ASSERTS + if (item.isDeleted() && + item.getDataType() == PROTOCOL_BINARY_DATATYPE_XATTR) { + // If we encounter a potential invalid doc (MB-51373) - Delete with + // datatype XATTR, we should have its value to be able to verify it + // is correct, or otherwise sanitise it. + Expects(item.getValue()); + } +#endif + if (item.isDeleted() && + item.getValue() && + item.getNBytes() == 0 && + item.getDataType() != PROTOCOL_BINARY_RAW_BYTES) { + std::stringstream ss; + ss << item; + EP_LOG_WARN( + "KVStore::checkAndFixKVStoreCreatedItem: {} correcting invalid " + "datatype {}", + item.getVBucketId(), + cb::UserDataView(ss.str()).getSanitizedValue()); + item.setDataType(PROTOCOL_BINARY_RAW_BYTES); + return true; + } + return false; +} diff --git a/engines/ep/src/kvstore.h b/engines/ep/src/kvstore.h index 815874c78b..a768ded432 100644 --- a/engines/ep/src/kvstore.h +++ b/engines/ep/src/kvstore.h @@ -41,6 +41,7 @@ class DiskDocKey; class Item; class KVStore; class KVStoreConfig; +class MetaData; class PersistenceCallback; class RollbackCB; class RollbackResult; @@ -926,6 +927,28 @@ class KVStore { postFlushHook = hook; } + /** + * Check if the specified document metadata is /potentially/ affected + * by a datatype corruption issue (MB-52793) - a deleted document with + * zero length value has an incorrect datatype. + * + * @return True if the document is /potentially/ affected and hence further + * analysis is needed (such as fetching the document body for + * additional checks). + */ + static bool isDocumentPotentiallyCorruptedByMB52793( + bool deleted, const MetaData& metadata); + + /** + * Function inspects the Item for some known issues that may exist in + * persisted data (possibly from older releases and now present due to + * upgrade). If an inconsistency is found it will log a fix the Item. + * + * @param item [in/out] the Item to check and if needed, fix. + * @return true if the Item was changed by the function because of an issue + */ + static bool checkAndFixKVStoreCreatedItem(Item& item); + protected: /** * Prepare for delete of the vbucket file - Implementation specific method diff --git a/engines/ep/src/magma-kvstore/magma-kvstore.cc b/engines/ep/src/magma-kvstore/magma-kvstore.cc index a24ca3dccf..0cae51aaa9 100644 --- a/engines/ep/src/magma-kvstore/magma-kvstore.cc +++ b/engines/ep/src/magma-kvstore/magma-kvstore.cc @@ -1267,6 +1267,10 @@ std::unique_ptr MagmaKVStore::makeItem(Vbid vb, vb, meta.revSeqno); + if (filter != ValueFilter::KEYS_ONLY) { + checkAndFixKVStoreCreatedItem(*item); + } + if (meta.deleted) { item->setDeleted(static_cast(meta.deleteSource)); } diff --git a/engines/ep/tests/module_tests/evp_store_single_threaded_test.cc b/engines/ep/tests/module_tests/evp_store_single_threaded_test.cc index b2710d3c5c..3560d8b22c 100644 --- a/engines/ep/tests/module_tests/evp_store_single_threaded_test.cc +++ b/engines/ep/tests/module_tests/evp_store_single_threaded_test.cc @@ -5996,3 +5996,100 @@ TEST_P(STParamPersistentBucketTest, EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES, v->getDatatype()); } } + +TEST_P(STParamPersistentBucketTest, + SanitizeOnDiskDeletedDocWithIncorrectXATTRFull) { + testSanitizeOnDiskDeletedDocWithIncorrectXATTR(false); +} + +TEST_P(STParamPersistentBucketTest, + SanitizeOnDiskDeletedDocWithIncorrectXATTRMetaOnly) { + testSanitizeOnDiskDeletedDocWithIncorrectXATTR(true); +} + +void STParamPersistentBucketTest:: + testSanitizeOnDiskDeletedDocWithIncorrectXATTR(bool fetchMetaOnly) { + setVBucketStateAndRunPersistTask(vbid, vbucket_state_active); + + const auto key = makeStoredDocKey("key"); + + // Store an initial item for us to rollback to (see end of test). + const auto initalItem = store_item(vbid, key, "value"); + flushVBucketToDiskIfPersistent(vbid, 1); + + // Construct a document on-disk which is Deleted, zero value, + // with datatype=XATTR. + setVBucketStateAndRunPersistTask(vbid, vbucket_state_replica); + auto item = make_item(vbid, key, {}, 0, PROTOCOL_BINARY_DATATYPE_XATTR); + item.setCas(1234); + item.setDeleted(DeleteSource::Explicit); + + uint64_t seqno; + ASSERT_EQ(ENGINE_SUCCESS, + store->setWithMeta(item, + 0 /* cas */, + &seqno, + cookie, + {vbucket_state_replica}, + CheckConflicts::No, + /*allowExisting*/ true)); + flushVBucketToDiskIfPersistent(vbid, 1); + + // Re-fetch from disk to confirm it is correctly sanitized. + // Need to be active to be able to fetch from store APIs. + setVBucketStateAndRunPersistTask(vbid, vbucket_state_active); + + auto fetchDocAndCheck = [&]() { + if (fetchMetaOnly) { + ItemMetaData metadata; + uint32_t deleted; + uint8_t datatype; + ASSERT_EQ(ENGINE_EWOULDBLOCK, + store->getMetaData(key, + vbid, + cookie, + metadata, + deleted, + datatype)); + runBGFetcherTask(); + ASSERT_EQ(ENGINE_SUCCESS, + store->getMetaData(key, + vbid, + cookie, + metadata, + deleted, + datatype)); + EXPECT_TRUE(deleted); + EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES, datatype); + } else { + // Fetch entire document and check sanitized. + get_options_t options = static_cast( + QUEUE_BG_FETCH | HONOR_STATES | TRACK_REFERENCE | + DELETE_TEMP | HIDE_LOCKED_CAS | TRACK_STATISTICS | + GET_DELETED_VALUE); + auto gv = store->get(key, vbid, cookie, options); + ASSERT_EQ(ENGINE_EWOULDBLOCK, gv.getStatus()); + + runBGFetcherTask(); + gv = store->get(key, vbid, cookie, GET_DELETED_VALUE); + EXPECT_EQ(ENGINE_SUCCESS, gv.getStatus()); + EXPECT_EQ(PROTOCOL_BINARY_RAW_BYTES, + gv.item->getDataType()); + } + }; + fetchDocAndCheck(); + + // Restart and warmup, checking that the invalid document does not cause + // issues at warmup. + resetEngineAndWarmup(); + fetchDocAndCheck(); + + // Finally trigger a rollback of the problematic document, and + // confirm the rollback is successful (it must perform a KVStore scan + // which covers the affected document. + setVBucketStateAndRunPersistTask(vbid, vbucket_state_replica); + ASSERT_EQ(TaskStatus::Complete, + store->rollback(vbid, initalItem.getBySeqno())); + + +} diff --git a/engines/ep/tests/module_tests/evp_store_single_threaded_test.h b/engines/ep/tests/module_tests/evp_store_single_threaded_test.h index b18f553dae..f103a3e61a 100644 --- a/engines/ep/tests/module_tests/evp_store_single_threaded_test.h +++ b/engines/ep/tests/module_tests/evp_store_single_threaded_test.h @@ -399,4 +399,14 @@ class STParamPersistentBucketTest : public STParameterizedBucketTest { void testCompactionPersistedDeletes(bool dropDeletes); void testFailoverTableEntryPersistedAtWarmup(std::function); + + /** + * Test for MB-51373 - if we end up with a deleted document on-disk with + * an empty value but datatype=XATTR (when it should be RAW_BYTES - bug + * MB-52793), then we should sanitize that value when it is loaded from + * disk. + * @param fetchMetaOnly If true then when fetching corrupted doc from disk, + * only fetch metadata, else fetch entire document. + */ + void testSanitizeOnDiskDeletedDocWithIncorrectXATTR(bool fetchMetaOnly); };