diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index c3f0f59d59..1346c0198c 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -384,7 +384,7 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response sentSeqs = append(sentSeqs, seq) } } else { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (seq:%v)", base.UD(docID), revID, seq) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s/%s (seq:%v)", base.UD(docID), revID, seq) if collectionCtx.sgr2PushAlreadyKnownSeqsCallback != nil { alreadyKnownSeqs = append(alreadyKnownSeqs, seq) } diff --git a/db/crud.go b/db/crud.go index fc5d147da4..0b656ae80d 100644 --- a/db/crud.go +++ b/db/crud.go @@ -11,6 +11,7 @@ package db import ( "bytes" "context" + "errors" "fmt" "math" "net/http" @@ -21,7 +22,7 @@ import ( "github.com/couchbase/sync_gateway/auth" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/channels" - "github.com/pkg/errors" + pkgerrors "github.com/pkg/errors" ) const ( @@ -529,21 +530,23 @@ func (db *DatabaseCollectionWithUser) Get1xRevAndChannels(ctx context.Context, d } // Returns an HTTP 403 error if the User is not allowed to access any of this revision's channels. -func (col *DatabaseCollectionWithUser) authorizeDoc(doc *Document, revid string) error { +func (col *DatabaseCollectionWithUser) authorizeDoc(ctx context.Context, doc *Document, revid string) error { user := col.user if doc == nil || user == nil { return nil // A nil User means access control is disabled } - if revid == "" { - revid = doc.CurrentRev - } - if rev := doc.History[revid]; rev != nil { - // Authenticate against specific revision: - return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, rev.Channels) - } else { - // No such revision; let the caller proceed and return a 404 + + channelsForRev, ok := doc.channelsForRev(revid) + if !ok { + // No such revision + // let the caller proceed and return a 404 return nil + } else if channelsForRev == nil { + // non-leaf (no channel info) - force 404 (caller would find the rev if it tried to look) + return ErrMissing } + + return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, channelsForRev) } // Gets a revision of a document. If it's obsolete it will be loaded from the database if possible. @@ -681,14 +684,14 @@ func (db *DatabaseCollectionWithUser) getAncestorJSON(ctx context.Context, doc * // instead returns a minimal deletion or removal revision to let them know it's gone. func (db *DatabaseCollectionWithUser) get1xRevFromDoc(ctx context.Context, doc *Document, revid string, listRevisions bool) (bodyBytes []byte, removed bool, err error) { var attachments AttachmentsMeta - if err := db.authorizeDoc(doc, revid); err != nil { + if err := db.authorizeDoc(ctx, doc, revid); err != nil { // As a special case, you don't need channel access to see a deletion revision, // otherwise the client's replicator can't process the deletion (since deletions // usually aren't on any channels at all!) But don't show the full body. (See #59) // Update: this applies to non-deletions too, since the client may have lost access to // the channel and gotten a "removed" entry in the _changes feed. It then needs to // incorporate that tombstone and for that it needs to see the _revisions property. - if revid == "" || doc.History[revid] == nil { + if revid == "" || doc.History[revid] == nil || errors.Is(err, ErrMissing) { return nil, false, err } if doc.History[revid].Deleted { @@ -1557,7 +1560,7 @@ func (db *DatabaseCollectionWithUser) addAttachments(ctx context.Context, newAtt if errors.Is(err, ErrAttachmentTooLarge) || err.Error() == "document value was too large" { err = base.HTTPErrorf(http.StatusRequestEntityTooLarge, "Attachment too large") } else { - err = errors.Wrap(err, "Error adding attachment") + err = pkgerrors.Wrap(err, "Error adding attachment") } } return err @@ -1754,7 +1757,8 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d return } - if len(channelSet) > 0 { + isWinningRev := doc.CurrentRev == newRevID + if len(channelSet) > 0 && !isWinningRev { doc.History[newRevID].Channels = channelSet } @@ -1781,7 +1785,7 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d return } } - _, err = doc.updateChannels(ctx, channelSet) + _, err = doc.updateChannels(ctx, isWinningRev, channelSet) if err != nil { return } @@ -2010,7 +2014,11 @@ func (db *DatabaseCollectionWithUser) updateAndReturnDoc(ctx context.Context, do return nil, "", err } - revChannels := doc.History[newRevID].Channels + revChannels, ok := doc.channelsForRev(newRevID) + if !ok { + // Should be unreachable, as we've already checked History[newRevID] above ... + return nil, "", base.RedactErrorf("unable to determine channels for %s/%s", base.UD(docid), newRevID) + } documentRevision := DocumentRevision{ DocID: docid, RevID: newRevID, diff --git a/db/database.go b/db/database.go index 8508e4bbf3..a683f49f3b 100644 --- a/db/database.go +++ b/db/database.go @@ -1734,9 +1734,11 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d access = nil channels = nil } - rev.Channels = channels - if rev.ID == doc.CurrentRev { + isWinningRev := rev.ID == doc.CurrentRev + if !isWinningRev { + rev.Channels = channels + } else { if regenerateSequences { updatedUnusedSequences, err = db.assignSequence(ctx, 0, doc, unusedSequences) if err != nil { @@ -1745,7 +1747,7 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d forceUpdate = true } - changedChannels, err := doc.updateChannels(ctx, channels) + changedChannels, err := doc.updateChannels(ctx, isWinningRev, channels) changed = len(doc.Access.updateAccess(ctx, doc, access)) + len(doc.RoleAccess.updateAccess(ctx, doc, roles)) + len(changedChannels) diff --git a/db/database_test.go b/db/database_test.go index 97c9330678..8d17557caf 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -457,6 +457,70 @@ func TestIsServerless(t *testing.T) { } } +func TestUncachedOldRevisionChannel(t *testing.T) { + db, ctx := setupTestDB(t) + defer db.Close(ctx) + collection := GetSingleDatabaseCollectionWithUser(t, db) + collection.ChannelMapper = channels.NewChannelMapper(ctx, channels.DocChannelsSyncFunction, db.Options.JavascriptTimeout) + + auth := db.Authenticator(base.TestCtx(t)) + + userAlice, err := auth.NewUser("alice", "pass", base.SetOf("ABC")) + require.NoError(t, err, "Error creating user") + + collection.user = userAlice + + // Create the first revision of doc1. + rev1Body := Body{ + "k1": "v1", + "channels": []string{"ABC"}, + } + rev1ID, _, err := collection.Put(ctx, "doc1", rev1Body) + require.NoError(t, err, "Error creating doc") + + rev2Body := Body{ + "k2": "v2", + "channels": []string{"ABC"}, + BodyRev: rev1ID, + } + rev2ID, _, err := collection.Put(ctx, "doc1", rev2Body) + require.NoError(t, err, "Error creating doc") + + rev3Body := Body{ + "k3": "v3", + "channels": []string{"ABC"}, + BodyRev: rev2ID, + } + rev3ID, _, err := collection.Put(ctx, "doc1", rev3Body) + require.NoError(t, err, "Error creating doc") + require.NotEmpty(t, rev3ID, "Error creating doc") + + body, err := collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil) + require.NoError(t, err, "Error getting 1x rev body") + + // old rev was cached so still retains channel information + _, rev1Digest := ParseRevID(ctx, rev1ID) + _, rev2Digest := ParseRevID(ctx, rev2ID) + bodyExpected := Body{ + "k2": "v2", + "channels": []string{"ABC"}, + BodyRevisions: Revisions{ + RevisionsStart: 2, + RevisionsIds: []string{rev2Digest, rev1Digest}, + }, + BodyId: "doc1", + BodyRev: rev2ID, + } + require.Equal(t, bodyExpected, body) + + // Flush the revision cache to force load from backup revision + collection.FlushRevisionCacheForTest() + + // 404 because we lost the non-leaf channel information after cache flush + _, _, _, _, _, _, _, _, err = collection.Get1xRevAndChannels(ctx, "doc1", rev2ID, false) + assertHTTPError(t, err, 404) +} + // Test removal handling for unavailable multi-channel revisions. func TestGetRemovalMultiChannel(t *testing.T) { db, ctx := setupTestDB(t) @@ -691,7 +755,7 @@ func TestDeltaSyncWhenToRevIsChannelRemoval(t *testing.T) { require.NoError(t, db.DbStats.InitDeltaSyncStats()) delta, redactedRev, err = collection.GetDelta(ctx, "doc1", rev1ID, rev2ID) - require.Equal(t, base.HTTPErrorf(404, "missing"), err) + assert.Equal(t, base.HTTPErrorf(404, "missing"), err) assert.Nil(t, delta) assert.Nil(t, redactedRev) } @@ -1464,7 +1528,10 @@ func TestSyncFnOnPush(t *testing.T) { "public": &channels.ChannelRemoval{Seq: 2, RevID: "4-four"}, }, doc.Channels) - assert.Equal(t, base.SetOf("clibup"), doc.History["4-four"].Channels) + // We no longer store channels for the winning revision in the RevTree, + // so don't expect it to be in doc.History like it used to be... + // The above assertion ensured the doc was *actually* in the correct channel. + assert.Nil(t, doc.History["4-four"].Channels) } func TestInvalidChannel(t *testing.T) { diff --git a/db/document.go b/db/document.go index f8b20913db..b67de0336a 100644 --- a/db/document.go +++ b/db/document.go @@ -95,6 +95,19 @@ type SyncData struct { addedRevisionBodies []string // revIDs of non-winning revision bodies that have been added (and so require persistence) removedRevisionBodyKeys map[string]string // keys of non-winning revisions that have been removed (and so may require deletion), indexed by revID + + currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time) +} + +// determine set of current channels based on removal entries. +func (sd *SyncData) getCurrentChannels() base.Set { + ch := base.SetOf() + for channelName, channelRemoval := range sd.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) + } + } + return ch } func (sd *SyncData) HashRedact(salt string) SyncData { @@ -181,6 +194,8 @@ type Document struct { RevID string DocAttachments AttachmentsMeta inlineSyncData bool + + currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time) } type historyOnlySyncData struct { @@ -940,7 +955,7 @@ func (doc *Document) addToChannelSetHistory(channelName string, historyEntry Cha // Updates the Channels property of a document object with current & past channels. // Returns the set of channels that have changed (document joined or left in this revision) -func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) (changedChannels base.Set, err error) { +func (doc *Document) updateChannels(ctx context.Context, isWinningRev bool, newChannels base.Set) (changedChannels base.Set, err error) { var changed []string oldChannels := doc.Channels if oldChannels == nil { @@ -969,6 +984,9 @@ func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) ( doc.updateChannelHistory(channel, doc.Sequence, true) } } + if isWinningRev { + doc.SyncData.currentRevChannels = newChannels + } if changed != nil { base.InfofCtx(ctx, base.KeyCRUD, "\tDoc %q / %q in channels %q", base.UD(doc.ID), doc.CurrentRev, base.UD(newChannels)) changedChannels, err = channels.SetFromArray(changed, channels.KeepStar) @@ -981,6 +999,7 @@ func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) ( // Set of channels returned from IsChannelRemoval are "Active" channels and NOT "Removed". func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBytes []byte, history Revisions, channels base.Set, isRemoval bool, isDelete bool, err error) { + activeChannels := make(base.Set) removedChannels := make(base.Set) // Iterate over the document's channel history, looking for channels that were removed at revID. If found, also identify whether the removal was a tombstone. @@ -990,25 +1009,16 @@ func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBy if removal.Deleted == true { isDelete = true } + } else { + activeChannels[channel] = struct{}{} } } + // If no matches found, return isRemoval=false if len(removedChannels) == 0 { return nil, nil, nil, false, false, nil } - // Construct removal body - // doc ID and rev ID aren't required to be inserted here, as both of those are available in the request. - bodyBytes = []byte(RemovedRedactedDocument) - - activeChannels := make(base.Set) - // Add active channels to the channel set if the the revision is available in the revision tree. - if revInfo, ok := doc.History[revID]; ok { - for channel, _ := range revInfo.Channels { - activeChannels[channel] = struct{}{} - } - } - // Build revision history for revID revHistory, err := doc.History.getHistory(revID) if err != nil { @@ -1021,6 +1031,10 @@ func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBy } history = encodeRevisions(ctx, doc.ID, revHistory) + // Construct removal body + // doc ID and rev ID aren't required to be inserted here, as both of those are available in the request. + bodyBytes = []byte(RemovedRedactedDocument) + return bodyBytes, history, activeChannels, true, isDelete, nil } @@ -1217,3 +1231,29 @@ func (doc *Document) MarshalWithXattr() (data []byte, xdata []byte, err error) { return data, xdata, nil } + +// channelsForRev returns the set of channels the given revision is in for the document +// Channel information is only stored for leaf nodes in the revision tree, as we don't keep full history of channel information +func (doc *Document) channelsForRev(revid string) (base.Set, bool) { + if revid == "" || doc.CurrentRev == revid { + return doc.currentRevChannels, true + } + + if rev, ok := doc.History[revid]; ok { + return rev.Channels, true + } + + // no rev + return nil, false +} + +// Returns a set of the current (winning revision's) channels for the document. +func (doc *Document) currentChannels() base.Set { + ch := base.SetOf() + for channelName, channelRemoval := range doc.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) + } + } + return ch +} diff --git a/db/revision_cache_interface.go b/db/revision_cache_interface.go index cd8ba32b39..31ff4d5ebe 100644 --- a/db/revision_cache_interface.go +++ b/db/revision_cache_interface.go @@ -283,7 +283,10 @@ func revCacheLoaderForDocument(ctx context.Context, backingStore RevisionCacheBa return bodyBytes, body, history, channels, removed, nil, deleted, nil, getHistoryErr } history = encodeRevisions(ctx, doc.ID, validatedHistory) - channels = doc.History[revid].Channels + + if revChannels, ok := doc.channelsForRev(revid); ok { + channels = revChannels + } return bodyBytes, body, history, channels, removed, attachments, deleted, doc.Expiry, err } diff --git a/db/revision_cache_test.go b/db/revision_cache_test.go index 1451d353d9..0ff4c23e07 100644 --- a/db/revision_cache_test.go +++ b/db/revision_cache_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/channels" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -47,9 +48,16 @@ func (t *testBackingStore) GetDocument(ctx context.Context, docid string, unmars doc.CurrentRev = "1-abc" doc.History = RevTree{ doc.CurrentRev: { - Channels: base.SetOf("*"), + ID: doc.CurrentRev, }, } + + doc.Channels = channels.ChannelMap{ + "*": &channels.ChannelRemoval{RevID: doc.CurrentRev}, + } + // currentRevChannels usually populated on JSON unmarshal + doc.currentRevChannels = doc.getCurrentChannels() + return doc, nil } diff --git a/db/revtree.go b/db/revtree.go index 08c2dc11bf..42367bd763 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -28,8 +28,8 @@ type RevInfo struct { BodyKey string // Used when revision body stored externally (doc key used for external storage) Deleted bool depth uint32 - Body []byte // Used when revision body stored inline (stores bodies) - Channels base.Set + Body []byte // Used when revision body stored inline (stores bodies) + Channels base.Set // Set only if the revision is a non-winning leaf revision (we don't store channel history per-revision) HasAttachments bool } @@ -44,22 +44,22 @@ type RevTree map[string]*RevInfo // rev IDs, with a parallel array of parent indexes. Ordering in the arrays doesn't matter. // So the parent of Revs[i] is Revs[Parents[i]] (unless Parents[i] == -1, which denotes a root.) type revTreeList struct { - Revs []string `json:"revs"` // The revision IDs - Parents []int `json:"parents"` // Index of parent of each revision (-1 if root) - Deleted []int `json:"deleted,omitempty"` // Indexes of revisions that are deletions - Bodies_Old []string `json:"bodies,omitempty"` // JSON of each revision (legacy) - BodyMap map[string]string `json:"bodymap,omitempty"` // JSON of each revision - BodyKeyMap map[string]string `json:"bodyKeyMap,omitempty"` // Keys of revision bodies stored in external documents - Channels []base.Set `json:"channels"` - HasAttachments []int `json:"hasAttachments,omitempty"` // Indexes of revisions that has attachments + Revs []string `json:"revs"` // The revision IDs + Parents []int `json:"parents"` // Index of parent of each revision (-1 if root) + Deleted []int `json:"deleted,omitempty"` // Indexes of revisions that are deletions + Bodies_Old []string `json:"bodies,omitempty"` // Deprecated: JSON of each revision (see bodymap) + BodyMap map[string]string `json:"bodymap,omitempty"` // JSON of each revision + BodyKeyMap map[string]string `json:"bodyKeyMap,omitempty"` // Keys of revision bodies stored in external documents + Channels_Old []base.Set `json:"channels,omitempty"` // Deprecated: Channels for revisions (see channelsMap) + ChannelsMap map[string]base.Set `json:"channelsMap,omitempty"` // Channels for non-winning leaf revisions (current channels stored outside of history, and don't have a use-case for storing non-leaf channels) + HasAttachments []int `json:"hasAttachments,omitempty"` // Indexes of revisions that has attachments } func (tree RevTree) MarshalJSON() ([]byte, error) { n := len(tree) rep := revTreeList{ - Revs: make([]string, n), - Parents: make([]int, n), - Channels: make([]base.Set, n), + Revs: make([]string, n), + Parents: make([]int, n), } revIndexes := map[string]int{"": -1} @@ -67,6 +67,7 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { for _, info := range tree { revIndexes[info.ID] = i rep.Revs[i] = info.ID + if info.Body != nil || info.BodyKey != "" { // Marshal either the BodyKey or the Body, depending on whether a BodyKey is specified if info.BodyKey == "" { @@ -81,19 +82,29 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { rep.BodyKeyMap[strconv.FormatInt(int64(i), 10)] = info.BodyKey } } - rep.Channels[i] = info.Channels + + // for non-winning leaf revisions we'll store channel information + if len(info.Channels) > 0 { + if rep.ChannelsMap == nil { + rep.ChannelsMap = make(map[string]base.Set, 1) + } + rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels + } + if info.Deleted { if rep.Deleted == nil { rep.Deleted = make([]int, 0, 1) } rep.Deleted = append(rep.Deleted, i) } + if info.HasAttachments { if rep.HasAttachments == nil { rep.HasAttachments = make([]int, 0, 1) } rep.HasAttachments = append(rep.HasAttachments, i) } + i++ } @@ -126,8 +137,8 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { return } - // validate revTreeList revs, parents and channels lists are of equal length - if !(len(rep.Revs) == len(rep.Parents) && len(rep.Revs) == len(rep.Channels)) { + // validate revTreeList revs and parents lists are of equal length + if !(len(rep.Revs) == len(rep.Parents)) { return errors.New("revtreelist data is invalid, revs/parents/channels counts are inconsistent") } @@ -149,29 +160,51 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { info.BodyKey = bodyKey } } - if rep.Channels != nil { - info.Channels = rep.Channels[i] - } parentIndex := rep.Parents[i] if parentIndex >= 0 { info.Parent = rep.Revs[parentIndex] } (*tree)[revid] = &info } - if rep.Deleted != nil { - for _, i := range rep.Deleted { - info := (*tree)[rep.Revs[i]] - info.Deleted = true // because tree[rep.Revs[i]].Deleted=true is a compile error - (*tree)[rep.Revs[i]] = info + + for _, i := range rep.Deleted { + info := (*tree)[rep.Revs[i]] + info.Deleted = true // because tree[rep.Revs[i]].Deleted=true is a compile error + (*tree)[rep.Revs[i]] = info + } + + for _, i := range rep.HasAttachments { + info := (*tree)[rep.Revs[i]] + info.HasAttachments = true + (*tree)[rep.Revs[i]] = info + } + + for iStr, channels := range rep.ChannelsMap { + i, err := strconv.ParseInt(iStr, 10, 64) + if err != nil { + return err } + info := (*tree)[rep.Revs[i]] + info.Channels = channels } - if rep.HasAttachments != nil { - for _, i := range rep.HasAttachments { + + // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format + if len(rep.Channels_Old) > 0 { + winner, _, _ := tree.winningRevision(context.TODO()) + leaves := tree.Leaves() + for i, channels := range rep.Channels_Old { info := (*tree)[rep.Revs[i]] - info.HasAttachments = true - (*tree)[rep.Revs[i]] = info + // Ensure this leaf is not a winner (channels not stored in revtree for winning revision) + if _, isLeaf := leaves[info.ID]; isLeaf && info.ID != winner { + // set only if we've not already populated from ChannelsMap (we shouldn't ever have both) + if info.Channels != nil { + return fmt.Errorf("RevTree leaf %q had channels set already (from channelsMap)", info.ID) + } + info.Channels = channels + } } } + return } @@ -286,6 +319,15 @@ func (tree RevTree) GetLeavesFiltered(filter func(revId string) bool) []string { } +// Leaves returns a map of revisions that are leaves in the tree. +func (tree RevTree) Leaves() map[string]*RevInfo { + m := make(map[string]*RevInfo) + tree.forEachLeaf(func(info *RevInfo) { + m[info.ID] = info + }) + return m +} + func (tree RevTree) forEachLeaf(callback func(*RevInfo)) { isParent := map[string]bool{} for _, info := range tree { @@ -354,20 +396,20 @@ func (tree RevTree) findAncestorFromSet(revid string, ancestors []string) string } // Records a revision in a RevTree. -func (tree RevTree) addRevision(docid string, info RevInfo) (err error) { +func (tree RevTree) addRevision(docid string, info RevInfo) error { revid := info.ID if revid == "" { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, empty revid is illegal", docid)) - return + return fmt.Errorf("doc: %v, RevTree addRevision, empty revid is illegal", docid) } if tree.contains(revid) { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, already contains rev %q", docid, revid)) - return + return fmt.Errorf("doc: %v, RevTree addRevision, already contains rev %q", docid, revid) } - parent := info.Parent - if parent != "" && !tree.contains(parent) { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, parent id %q is missing", docid, parent)) - return + if p := info.Parent; p != "" { + parent, ok := tree[p] + if !ok { + return fmt.Errorf("doc: %v, RevTree addRevision for rev %q, parent id %q is missing", docid, revid, p) + } + parent.Channels = nil } tree[revid] = &info return nil diff --git a/db/revtree_test.go b/db/revtree_test.go index 0b34f42e52..9aa0f8ebe8 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -19,26 +19,48 @@ import ( "time" "github.com/couchbase/sync_gateway/base" + "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -// 1-one -- 2-two -- 3-three -var testmap = RevTree{"3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}")}, - "2-two": {ID: "2-two", Parent: "1-one", Channels: base.SetOf("ABC", "CBS")}, - "1-one": {ID: "1-one", Channels: base.SetOf("ABC")}} - -// / 3-three +// testmap is a revtree with a single branch // -// 1-one -- 2-two +// ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-one │──│ 2-two │──║3-three ║ +// └────────┘ └────────┘ ╚════════╝ +var testmap = RevTree{ + "3-three": {ID: "3-three", Parent: "2-two", Body: []byte(`{"foo":"bar"}`)}, + "2-two": {ID: "2-two", Parent: "1-one"}, + "1-one": {ID: "1-one"}, +} + +// branchymap is a branched revtree with two revisions in conflict // -// \ 3-drei -var branchymap = RevTree{"3-three": {ID: "3-three", Parent: "2-two"}, - "2-two": {ID: "2-two", Parent: "1-one"}, - "1-one": {ID: "1-one"}, - "3-drei": {ID: "3-drei", Parent: "2-two"}} +// ╔════════╗ +// ┌─║3-three ║ +// ┌────────┐ ┌────────┐ │ ╚════════╝ +// │ 1-one │──│ 2-two │─┤ +// └────────┘ └────────┘ │ ┌────────┐ +// └─│ 3-drei │ +// └────────┘ +var branchymap = RevTree{ + "3-three": {ID: "3-three", Parent: "2-two"}, // winner because higher ASCII value (d vs. t) + "2-two": {ID: "2-two", Parent: "1-one"}, + "1-one": {ID: "1-one"}, + "3-drei": {ID: "3-drei", Parent: "2-two"}, +} -var multiroot = RevTree{"3-a": {ID: "3-a", Parent: "2-a"}, +// multiroot is a revtree with multiple roots (disconnected branches) +// +// ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-a │─│ 2-a │─║ 3-a ║ +// └────────┘ └────────┘ ╚════════╝ +// ┌────────┐ ┌────────┐ +// │ 6-b │─│ 7-b │ +// └────────┘ └────────┘ +var multiroot = RevTree{ + "3-a": {ID: "3-a", Parent: "2-a"}, "2-a": {ID: "2-a", Parent: "1-a"}, "1-a": {ID: "1-a"}, "7-b": {ID: "7-b", Parent: "6-b"}, @@ -51,11 +73,20 @@ type BranchSpec struct { Digest string } -// / 3-a -- 4-a -- 5-a ...... etc (winning branch) -// 1-a -- 2-a -// \ 3-b -- 4-b ... etc (losing branch #1) -// \ 3-c -- 4-c ... etc (losing branch #2) -// \ 3-d -- 4-d ... etc (losing branch #n) +// getMultiBranchTestRevtree1 returns a rev tree with the specified number of revisions/branches +// +// ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-a │─│ 2-a │─┬──│ 3-a │─│ 4-a │─║ 5-a ║ +// └────────┘ └────────┘ │ └────────┘ └────────┘ ╚════════╝ +// │ ┌────────┐ ┌────────┐ +// ├──│ 3-b │─│ 4-b │ +// │ └────────┘ └────────┘ +// │ ┌────────┐ ┌────────┐ +// ├──│ 3-c │─│ 4-c │ +// │ └────────┘ └────────┘ +// │ ┌────────┐ ┌────────┐ +// └──│ 3-d │─│ 4-d │ +// └────────┘ └────────┘ // // NOTE: the 1-a -- 2-a unconflicted branch can be longer, depending on value of unconflictedBranchNumRevs func getMultiBranchTestRevtree1(ctx context.Context, unconflictedBranchNumRevs, winningBranchNumRevs int, losingBranches []BranchSpec) RevTree { @@ -157,11 +188,11 @@ func getMultiBranchTestRevtree1(ctx context.Context, unconflictedBranchNumRevs, } -func testUnmarshal(t *testing.T, jsonString string) RevTree { - gotmap := RevTree{} - assert.NoError(t, base.JSONUnmarshal([]byte(jsonString), &gotmap), "Couldn't parse RevTree from JSON") - assert.Equal(t, testmap, gotmap) - return gotmap +func assertRevTreeUnmarshal(t *testing.T, jsonString string, expectedRevtree RevTree) bool { + var gotmap RevTree + require.NoError(t, base.JSONUnmarshal([]byte(jsonString), &gotmap)) + t.Logf("Unmarshalled %s %v", jsonString, spew.Sprint(gotmap)) + return assert.Equal(t, expectedRevtree, gotmap) } // Make sure that the getMultiBranchTestRevtree1() helper works as expected @@ -188,29 +219,88 @@ func TestGetMultiBranchTestRevtree(t *testing.T) { } func TestRevTreeUnmarshalOldFormat(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodies": ["{}", "", ""], "channels": [null, ["ABC", "CBS"], ["ABC"]]}` - gotmap := testUnmarshal(t, testJSON) - fmt.Printf("Unmarshaled to %v\n", gotmap) + // 'channels' is an old revtree property that stored channel history for previous revisions. + // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. + // bodies is an old revtree property that stored full bodies for previous revisions. + // we moved this into bodyMap for inline bodies and bodyKeyMap for externally stored bodies. + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, 2, -1], + "bodies": ["{\"foo\":\"bar\"}", "", ""], + "channels": [["ABC", "CBS"], null, ["ABC"]] +}` + expected := testmap.copy() + + // winning rev channel information no longer present in revtree, + // expect to drop it at unmarshal time if it's still in the raw JSON + ri, err := expected.getInfo("3-three") + require.NoError(t, err) + ri.Channels = nil + + assertRevTreeUnmarshal(t, testJSON, expected) +} + +func TestRevTreeUnmarshalOldFormatNonWinningRev(t *testing.T) { + // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. + const testJSON = `{ + "revs": ["3-drei", "3-three", "2-two", "1-one"], + "parents": [2, 2, 3, -1], + "bodies": ["{\"foo\":\"buzz\"}", "{\"foo\":\"bar\"}", "", ""], + "channels": [["DE"], ["EN"], null, ["ABC"]] +}` + expected := testmap.copy() + + // winning rev channel information no longer present in revtree, + // expect to drop it at unmarshal time if it's still in the raw JSON + ri, err := expected.getInfo("3-three") + require.NoError(t, err) + ri.Channels = nil + + // non-winning revisions do retain channel information, so populate this for the expected expected + err = expected.addRevision("", RevInfo{ + ID: "3-drei", + Parent: "2-two", + Body: []byte(`{"foo":"buzz"}`), + Channels: base.SetOf("DE"), + }) + require.NoError(t, err) + + assertRevTreeUnmarshal(t, testJSON, expected) } func TestRevTreeUnmarshal(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodymap": {"0":"{}"}, "channels": [null, ["ABC", "CBS"], ["ABC"]]}` - gotmap := testUnmarshal(t, testJSON) - fmt.Printf("Unmarshaled to %v\n", gotmap) + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, 2, -1], + "bodymap": {"0":"{\"foo\":\"bar\"}"} +}` + assertRevTreeUnmarshal(t, testJSON, testmap) } -func TestRevTreeUnmarshalRevChannelCountMismatch(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodymap": {"0":"{}"}, "channels": [null, ["ABC", "CBS"]]}` +func TestRevTreeUnmarshalRevParentCountMismatch(t *testing.T) { + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, -1], + "bodymap": {"0":"{}"} +}` gotmap := RevTree{} err := base.JSONUnmarshal([]byte(testJSON), &gotmap) assert.Errorf(t, err, "revtreelist data is invalid, revs/parents/channels counts are inconsistent") } -func TestRevTreeMarshal(t *testing.T) { - bytes, err := base.JSONMarshal(testmap) - assert.NoError(t, err, "Couldn't write RevTree to JSON") - fmt.Printf("Marshaled RevTree as %s\n", string(bytes)) - testUnmarshal(t, string(bytes)) +func TestRevTreeMarshalUnmarshalRoundtrip(t *testing.T) { + tests := map[string]RevTree{ + "testmap": testmap, + "branchymap": branchymap, + "multiroot": multiroot, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + bytes, err := base.JSONMarshal(test) + require.NoError(t, err) + assertRevTreeUnmarshal(t, string(bytes), test) + }) + } } func TestRevTreeAccess(t *testing.T) { @@ -279,7 +369,7 @@ func TestRevTreeAddRevisionWithMissingParent(t *testing.T) { assert.Equal(t, testmap, tempmap) err := tempmap.addRevision("testdoc", RevInfo{ID: "5-five", Parent: "4-four"}) - assert.Equal(t, fmt.Sprintf("doc: %v, RevTree addRevision, parent id %q is missing", "testdoc", "4-four"), err.Error()) + assert.Equal(t, fmt.Sprintf("doc: %v, RevTree addRevision for rev %q, parent id %q is missing", "testdoc", "5-five", "4-four"), err.Error()) } func TestRevTreeCompareRevIDs(t *testing.T) { @@ -291,6 +381,47 @@ func TestRevTreeCompareRevIDs(t *testing.T) { assert.Equal(t, 1, compareRevIDs(ctx, "5-bbb", "1-zzz")) } +// TestRevTreeChannelMapLeafOnly ensures that only non-winning leaf revisions populate channel information from/to channelMap. +func TestRevTreeChannelMapLeafOnly(t *testing.T) { + + // Add a 4th revision with channels to supersede rev 3-three + // ┌─────────────┐ ╔═══════════════════════╗ + // ┌─│3-three (EN) │──║ 4-four (EN-us, EN-gb) ║ + // ┌────────┐ ┌────────┐ │ └─────────────┘ ╚═══════════════════════╝ + // │ 1-one │──│ 2-two │─┤ + // └────────┘ └────────┘ │ ┌─────────────┐ + // └─│ 3-drei (DE) │ + // └─────────────┘ + tree := branchymap.copy() + + ri, err := tree.getInfo("3-three") + require.NoError(t, err) + // this wouldn't have been set normally (winning rev), + // but let's force it to ensure we're not storing old revs in ChannelsMap + ri.Channels = base.SetOf("EN") + + err = tree.addRevision(t.Name(), RevInfo{ + ID: "4-four", + Parent: "3-three", + Channels: nil, // we don't store channels for winning revs + }) + require.NoError(t, err) + + // insert channel into tree - we don't store it in the globals because each test requires different channel data. + ri, err = tree.getInfo("3-drei") + require.NoError(t, err) + ri.Channels = base.SetOf("DE") + + // marshal RevTree into storage format + bytes, err := base.JSONMarshal(tree) + require.NoError(t, err) + + // unmarshal back into revTreeList to ensure non-leaf channels are stripped on marshal for stored format + var storedMap revTreeList + require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) + assert.Len(t, storedMap.ChannelsMap, 1, "expected only one channelsMap entry (for the non-winning leaf revisions)") +} + func TestRevTreeIsLeaf(t *testing.T) { assert.True(t, branchymap.isLeaf("3-three"), "isLeaf failed on 3-three") assert.True(t, branchymap.isLeaf("3-drei"), "isLeaf failed on 3-drei") diff --git a/go.mod b/go.mod index 46921ec62f..4abb2ca950 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/couchbaselabs/go-fleecedelta v0.0.0-20220909152808-6d09efa7a338 github.com/couchbaselabs/gocbconnstr v1.0.5 github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3 + github.com/davecgh/go-spew v1.1.1 github.com/elastic/gosigar v0.14.3 github.com/felixge/fgprof v0.9.3 github.com/google/uuid v1.6.0 @@ -58,8 +59,7 @@ require ( github.com/couchbase/tools-common/types v1.0.0 // indirect github.com/couchbase/tools-common/utils v1.0.0 // indirect github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/go-ole/go-ole v1.2.6 // indirect + github.com/go-ole/go-ole v1.3.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect diff --git a/go.sum b/go.sum index c59f8ba9ad..05ed403e99 100644 --- a/go.sum +++ b/go.sum @@ -51,8 +51,6 @@ github.com/couchbase/goprotostellar v1.0.1 h1:mtDVYTgnnDSQ3t7mQRG6jl/tOXKOuuFM9P github.com/couchbase/goprotostellar v1.0.1/go.mod h1:gs1eioLVOHETTFWxDY4v7Q/kRPMgqmX6t/TPcI429ls= github.com/couchbase/goutils v0.1.2 h1:gWr8B6XNWPIhfalHNog3qQKfGiYyh4K4VhO3P2o9BCs= github.com/couchbase/goutils v0.1.2/go.mod h1:h89Ek/tiOxxqjz30nPPlwZdQbdB8BwgnuBxeoUe/ViE= -github.com/couchbase/sg-bucket v0.0.0-20240326230241-0b197e169b27 h1:FGNvJsAQk6JZzuVXvoLXcoSQzOnQxWkywzYJFQqzXEg= -github.com/couchbase/sg-bucket v0.0.0-20240326230241-0b197e169b27/go.mod h1:5me3TJLTPfR0s3aMJZcPoTu5FT8oelaINz5l7Q3cApE= github.com/couchbase/sg-bucket v0.0.0-20240402154301-12625d8851a8 h1:kfWMYvUgSg2yIZJx+t63Ucl+zorvFqlYayXPkiXFtSE= github.com/couchbase/sg-bucket v0.0.0-20240402154301-12625d8851a8/go.mod h1:5me3TJLTPfR0s3aMJZcPoTu5FT8oelaINz5l7Q3cApE= github.com/couchbase/tools-common/cloud v1.0.0 h1:SQZIccXoedbrThehc/r9BJbpi/JhwJ8X00PDjZ2gEBE= @@ -74,8 +72,6 @@ github.com/couchbaselabs/gocbconnstr v1.0.5 h1:e0JokB5qbcz7rfnxEhNRTKz8q1svoRvDo github.com/couchbaselabs/gocbconnstr v1.0.5/go.mod h1:KV3fnIKMi8/AzX0O9zOrO9rofEqrRF1d2rG7qqjxC7o= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131 h1:2EAfFswAfgYn3a05DVcegiw6DgMgn1Mv5eGz6IHt1Cw= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131/go.mod h1:o7T431UOfFVHDNvMBUmUxpHnhivwv7BziUao/nMl81E= -github.com/couchbaselabs/rosmar v0.0.0-20240326232309-04dfb3337b60 h1:w9E8CEvQia8BPA+2Ai6dJh64wYTmxNUrXNPkKhtPpGw= -github.com/couchbaselabs/rosmar v0.0.0-20240326232309-04dfb3337b60/go.mod h1:MnlZ8BXE9Z7rUQEyb069P/6E9+YVkUxcqW5cmN23h0I= github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3 h1:AUvojYsPc2WgiO9xRalRQLyXzooRRWemdEkiGl+PZno= github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3/go.mod h1:SM0w4YHwXFMIyfqUbkpXZNWwAQKLwsUH91fsKUooMqw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -91,8 +87,9 @@ github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= +github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= diff --git a/rest/access_test.go b/rest/access_test.go index 8abbee7a55..62e033a7b5 100644 --- a/rest/access_test.go +++ b/rest/access_test.go @@ -716,7 +716,7 @@ func TestAllDocsAccessControl(t *testing.T) { allDocsResult = allDocsResponse{} err = base.JSONUnmarshal(response.Body.Bytes(), &allDocsResult) require.NoError(t, err) - require.Len(t, allDocsResult.Rows, 3) + require.Len(t, allDocsResult.Rows, 3) // FIXME (bbrks): Forbidden/403 with revcache changes assert.Equal(t, "doc3", allDocsResult.Rows[0].ID) assert.Equal(t, "doc4", allDocsResult.Rows[1].ID) assert.Equal(t, "doc5", allDocsResult.Rows[2].ID) diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 601e8a3012..39f6ee0566 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -2096,12 +2096,15 @@ func TestRawRedaction(t *testing.T) { // Test redact being disabled by default res = rt.SendAdminRequest("GET", "/{{.keyspace}}/_raw/testdoc", ``) + bodyBytes := res.Body.Bytes() + t.Logf("body: %s", string(bodyBytes)) var body map[string]interface{} - err := base.JSONUnmarshal(res.Body.Bytes(), &body) + err := base.JSONUnmarshal(bodyBytes, &body) assert.NoError(t, err) syncData := body[base.SyncPropertyName] assert.Equal(t, map[string]interface{}{"achannel": nil}, syncData.(map[string]interface{})["channels"]) - assert.Equal(t, []interface{}{[]interface{}{"achannel"}}, syncData.(map[string]interface{})["history"].(map[string]interface{})["channels"]) + // active revision for a doc that isn't in conflict, so don't expect a channelsMap + assert.Nil(t, syncData.(map[string]interface{})["channelsMap"]) // Test redacted body = map[string]interface{}{} @@ -2111,7 +2114,8 @@ func TestRawRedaction(t *testing.T) { syncData = body[base.SyncPropertyName] require.NotNil(t, syncData) assert.NotEqual(t, map[string]interface{}{"achannel": nil}, syncData.(map[string]interface{})["channels"]) - assert.NotEqual(t, []interface{}{[]interface{}{"achannel"}}, syncData.(map[string]interface{})["history"].(map[string]interface{})["channels"]) + // active revision for a doc that isn't in conflict, so don't expect a channelsMap + assert.Nil(t, syncData.(map[string]interface{})["channelsMap"]) // Test include doc false doesn't return doc body = map[string]interface{}{}