Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CBG-3203: Don't store channel history for non-leaf revisions #6368

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
16fffbb
Don't store channel history for non-leaf revisions
bbrks Jul 25, 2023
10507a7
Extend RevTree unmarshal test for more cases (conflicting revs with c…
bbrks Jul 26, 2023
44b8fac
Add failing test for non-leaf revisions containing channels
bbrks Jul 26, 2023
dc69d8b
Fix godoc describing wrong revtree winner due to inverse ASCII compar…
bbrks Jul 26, 2023
15ef848
Only populate channels for leaf revisions on marshal/unmarshal
bbrks Jul 26, 2023
058ce82
Keep Channels_Old for backwards compatibility (reading existing marsh…
bbrks Jul 27, 2023
8b23e1e
wip
bbrks Aug 8, 2023
5ce9e15
Build activeChannels from IsChannelRemoval based on sync.Channels
bbrks Aug 15, 2023
f32d938
goimports
bbrks Aug 15, 2023
8114336
Remove leaf check from RevTree Unmarshal (assume JSON is always corre…
bbrks Sep 12, 2023
1ae9ded
Cleanup addRevision error formatting
bbrks Sep 13, 2023
8f70fdd
Skip leaf check when storing for ChannelMap - nil out parent channels…
bbrks Sep 13, 2023
84ad764
Change TestRevTreeChannelMapLeafOnly assertion to non-winning leaf only
bbrks Sep 13, 2023
dc8985e
Skip ChannelsMap storage on Marshal for the winning revision (it's st…
bbrks Sep 13, 2023
4c8d9ed
Move RevTree channel info into tests
bbrks Sep 13, 2023
12da452
Change now false assertion in TestSyncFnOnPush
bbrks Sep 13, 2023
9359d53
Handle winning revision logic in authorizeDoc
bbrks Sep 13, 2023
29969e4
Make revCacheLoaderForDocument work for winning revision channels not…
bbrks Sep 13, 2023
eaec520
Fix missing context
bbrks Sep 14, 2023
3bfaaea
Store set of currentRevChannels on Document at unmarshal time
bbrks Sep 18, 2023
220c275
Move currentRevChannels logic down into SyncData type
bbrks Nov 2, 2023
52495bc
Remove old FIXMEs
bbrks Nov 3, 2023
fd3e91a
simplify
bbrks Nov 3, 2023
eacbe39
Address PR comments (move logic into Unmarshal only - and don't set c…
bbrks Nov 7, 2023
25c2fab
Return 404 from authorizeDoc when an uncached non-leaf rev is requested
bbrks Nov 21, 2023
1c0e4cd
use errors.Is instead of ==
bbrks Nov 21, 2023
5698d3e
Fix duplicate UnmarshalJSON func for SyncData
bbrks Jan 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion db/blip_sync_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response
sentSeqs = append(sentSeqs, seq)
}
} else {
base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (seq:%v)", base.UD(docID), revID, seq)
base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s/%s (seq:%v)", base.UD(docID), revID, seq)
if collectionCtx.sgr2PushAlreadyKnownSeqsCallback != nil {
alreadyKnownSeqs = append(alreadyKnownSeqs, seq)
}
Expand Down
40 changes: 24 additions & 16 deletions db/crud.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package db
import (
"bytes"
"context"
"errors"
"fmt"
"math"
"net/http"
Expand All @@ -21,7 +22,7 @@ import (
"github.com/couchbase/sync_gateway/auth"
"github.com/couchbase/sync_gateway/base"
"github.com/couchbase/sync_gateway/channels"
"github.com/pkg/errors"
pkgerrors "github.com/pkg/errors"
)

const (
Expand Down Expand Up @@ -529,21 +530,23 @@ func (db *DatabaseCollectionWithUser) Get1xRevAndChannels(ctx context.Context, d
}

// Returns an HTTP 403 error if the User is not allowed to access any of this revision's channels.
func (col *DatabaseCollectionWithUser) authorizeDoc(doc *Document, revid string) error {
func (col *DatabaseCollectionWithUser) authorizeDoc(ctx context.Context, doc *Document, revid string) error {
user := col.user
if doc == nil || user == nil {
return nil // A nil User means access control is disabled
}
if revid == "" {
revid = doc.CurrentRev
}
if rev := doc.History[revid]; rev != nil {
// Authenticate against specific revision:
return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, rev.Channels)
} else {
// No such revision; let the caller proceed and return a 404

channelsForRev, ok := doc.channelsForRev(revid)
bbrks marked this conversation as resolved.
Show resolved Hide resolved
if !ok {
// No such revision
// let the caller proceed and return a 404
return nil
} else if channelsForRev == nil {
// non-leaf (no channel info) - force 404 (caller would find the rev if it tried to look)
return ErrMissing
}

return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, channelsForRev)
}

// Gets a revision of a document. If it's obsolete it will be loaded from the database if possible.
Expand Down Expand Up @@ -681,14 +684,14 @@ func (db *DatabaseCollectionWithUser) getAncestorJSON(ctx context.Context, doc *
// instead returns a minimal deletion or removal revision to let them know it's gone.
func (db *DatabaseCollectionWithUser) get1xRevFromDoc(ctx context.Context, doc *Document, revid string, listRevisions bool) (bodyBytes []byte, removed bool, err error) {
var attachments AttachmentsMeta
if err := db.authorizeDoc(doc, revid); err != nil {
if err := db.authorizeDoc(ctx, doc, revid); err != nil {
// As a special case, you don't need channel access to see a deletion revision,
// otherwise the client's replicator can't process the deletion (since deletions
// usually aren't on any channels at all!) But don't show the full body. (See #59)
// Update: this applies to non-deletions too, since the client may have lost access to
// the channel and gotten a "removed" entry in the _changes feed. It then needs to
// incorporate that tombstone and for that it needs to see the _revisions property.
if revid == "" || doc.History[revid] == nil {
if revid == "" || doc.History[revid] == nil || errors.Is(err, ErrMissing) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the comment on line 700, it seems like we want to bypass the security check for tombstones or removals (so that we successfully set bodyBytes to either EmptyDocument or RemovedRedactedDocument below). I'm wondering if the ErrMissing handling is going to break this, in particular for removals. I went looking for a test that covers this path - TestGetRemovedAsUser seems close but I don't think it purges the removal from the rev cache before attempting to retrieve. I think it would be good to extend TestGetRemovedAsUser for this scenario to see whether it changes before/after this PR:

  • removal revision is non-leaf
  • removal revision is not resident in the rev cache
  • verify removal is still returned

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same might be true for tombstones, although non-leaf tombstones are more of a corner case. It would be common for removals to be non-leaf revisions.

Copy link
Member Author

@bbrks bbrks Nov 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My interpretation of the comment was that an old rev is neither removed from a channel, nor a tombstone. Is that incorrect?

An old revision we don't have channel info for any more seems the same as somebody requesting a pruned revision, or doc1 / 123-invalid which would return ErrMissing.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of the usage of IsChannelRemoval in revCacheLoaderForDocument , where we check the channel history to identify removals for revisions that aren't in the rev tree, and return a removal instead of 404 for those. I wasn't clear about the interaction (if any) between that handling and the changes here.

Copy link
Member Author

@bbrks bbrks Nov 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, will follow up and get test coverage for a removal that isn't cached and ensure behaviour doesn't change with this PR.

return nil, false, err
}
if doc.History[revid].Deleted {
Expand Down Expand Up @@ -1557,7 +1560,7 @@ func (db *DatabaseCollectionWithUser) addAttachments(ctx context.Context, newAtt
if errors.Is(err, ErrAttachmentTooLarge) || err.Error() == "document value was too large" {
err = base.HTTPErrorf(http.StatusRequestEntityTooLarge, "Attachment too large")
} else {
err = errors.Wrap(err, "Error adding attachment")
err = pkgerrors.Wrap(err, "Error adding attachment")
}
}
return err
Expand Down Expand Up @@ -1754,7 +1757,8 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d
return
}

if len(channelSet) > 0 {
isWinningRev := doc.CurrentRev == newRevID
if len(channelSet) > 0 && !isWinningRev {
doc.History[newRevID].Channels = channelSet
}

Expand All @@ -1781,7 +1785,7 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d
return
}
}
_, err = doc.updateChannels(ctx, channelSet)
_, err = doc.updateChannels(ctx, isWinningRev, channelSet)
if err != nil {
return
}
Expand Down Expand Up @@ -2010,7 +2014,11 @@ func (db *DatabaseCollectionWithUser) updateAndReturnDoc(ctx context.Context, do
return nil, "", err
}

revChannels := doc.History[newRevID].Channels
revChannels, ok := doc.channelsForRev(newRevID)
if !ok {
// Should be unreachable, as we've already checked History[newRevID] above ...
return nil, "", base.RedactErrorf("unable to determine channels for %s/%s", base.UD(docid), newRevID)
}
documentRevision := DocumentRevision{
DocID: docid,
RevID: newRevID,
Expand Down
8 changes: 5 additions & 3 deletions db/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -1734,9 +1734,11 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d
access = nil
channels = nil
}
rev.Channels = channels

if rev.ID == doc.CurrentRev {
isWinningRev := rev.ID == doc.CurrentRev
if !isWinningRev {
rev.Channels = channels
} else {
if regenerateSequences {
updatedUnusedSequences, err = db.assignSequence(ctx, 0, doc, unusedSequences)
if err != nil {
Expand All @@ -1745,7 +1747,7 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d
forceUpdate = true
}

changedChannels, err := doc.updateChannels(ctx, channels)
changedChannels, err := doc.updateChannels(ctx, isWinningRev, channels)
changed = len(doc.Access.updateAccess(ctx, doc, access)) +
len(doc.RoleAccess.updateAccess(ctx, doc, roles)) +
len(changedChannels)
Expand Down
71 changes: 69 additions & 2 deletions db/database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,70 @@ func TestIsServerless(t *testing.T) {
}
}

func TestUncachedOldRevisionChannel(t *testing.T) {
db, ctx := setupTestDB(t)
defer db.Close(ctx)
collection := GetSingleDatabaseCollectionWithUser(t, db)
collection.ChannelMapper = channels.NewChannelMapper(ctx, channels.DocChannelsSyncFunction, db.Options.JavascriptTimeout)

auth := db.Authenticator(base.TestCtx(t))

userAlice, err := auth.NewUser("alice", "pass", base.SetOf("ABC"))
require.NoError(t, err, "Error creating user")

collection.user = userAlice

// Create the first revision of doc1.
rev1Body := Body{
"k1": "v1",
"channels": []string{"ABC"},
}
rev1ID, _, err := collection.Put(ctx, "doc1", rev1Body)
require.NoError(t, err, "Error creating doc")

rev2Body := Body{
"k2": "v2",
"channels": []string{"ABC"},
BodyRev: rev1ID,
}
rev2ID, _, err := collection.Put(ctx, "doc1", rev2Body)
require.NoError(t, err, "Error creating doc")

rev3Body := Body{
"k3": "v3",
"channels": []string{"ABC"},
BodyRev: rev2ID,
}
rev3ID, _, err := collection.Put(ctx, "doc1", rev3Body)
require.NoError(t, err, "Error creating doc")
require.NotEmpty(t, rev3ID, "Error creating doc")

body, err := collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil)
require.NoError(t, err, "Error getting 1x rev body")

// old rev was cached so still retains channel information
_, rev1Digest := ParseRevID(ctx, rev1ID)
_, rev2Digest := ParseRevID(ctx, rev2ID)
bodyExpected := Body{
"k2": "v2",
"channels": []string{"ABC"},
BodyRevisions: Revisions{
RevisionsStart: 2,
RevisionsIds: []string{rev2Digest, rev1Digest},
},
BodyId: "doc1",
BodyRev: rev2ID,
}
require.Equal(t, bodyExpected, body)

// Flush the revision cache to force load from backup revision
collection.FlushRevisionCacheForTest()

// 404 because we lost the non-leaf channel information after cache flush
_, _, _, _, _, _, _, _, err = collection.Get1xRevAndChannels(ctx, "doc1", rev2ID, false)
assertHTTPError(t, err, 404)
}

// Test removal handling for unavailable multi-channel revisions.
func TestGetRemovalMultiChannel(t *testing.T) {
db, ctx := setupTestDB(t)
Expand Down Expand Up @@ -691,7 +755,7 @@ func TestDeltaSyncWhenToRevIsChannelRemoval(t *testing.T) {
require.NoError(t, db.DbStats.InitDeltaSyncStats())

delta, redactedRev, err = collection.GetDelta(ctx, "doc1", rev1ID, rev2ID)
require.Equal(t, base.HTTPErrorf(404, "missing"), err)
assert.Equal(t, base.HTTPErrorf(404, "missing"), err)
assert.Nil(t, delta)
assert.Nil(t, redactedRev)
}
Expand Down Expand Up @@ -1464,7 +1528,10 @@ func TestSyncFnOnPush(t *testing.T) {
"public": &channels.ChannelRemoval{Seq: 2, RevID: "4-four"},
}, doc.Channels)

assert.Equal(t, base.SetOf("clibup"), doc.History["4-four"].Channels)
// We no longer store channels for the winning revision in the RevTree,
// so don't expect it to be in doc.History like it used to be...
// The above assertion ensured the doc was *actually* in the correct channel.
assert.Nil(t, doc.History["4-four"].Channels)
}

func TestInvalidChannel(t *testing.T) {
Expand Down
66 changes: 53 additions & 13 deletions db/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@

addedRevisionBodies []string // revIDs of non-winning revision bodies that have been added (and so require persistence)
removedRevisionBodyKeys map[string]string // keys of non-winning revisions that have been removed (and so may require deletion), indexed by revID

currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time)
}

// determine set of current channels based on removal entries.
func (sd *SyncData) getCurrentChannels() base.Set {
ch := base.SetOf()
for channelName, channelRemoval := range sd.Channels {
if channelRemoval == nil || channelRemoval.Seq == 0 {
ch.Add(channelName)
}
}
return ch
}

func (sd *SyncData) HashRedact(salt string) SyncData {
Expand Down Expand Up @@ -181,6 +194,8 @@
RevID string
DocAttachments AttachmentsMeta
inlineSyncData bool

currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time)
}

type historyOnlySyncData struct {
Expand Down Expand Up @@ -940,7 +955,7 @@

// Updates the Channels property of a document object with current & past channels.
// Returns the set of channels that have changed (document joined or left in this revision)
func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) (changedChannels base.Set, err error) {
func (doc *Document) updateChannels(ctx context.Context, isWinningRev bool, newChannels base.Set) (changedChannels base.Set, err error) {
var changed []string
oldChannels := doc.Channels
if oldChannels == nil {
Expand Down Expand Up @@ -969,6 +984,9 @@
doc.updateChannelHistory(channel, doc.Sequence, true)
}
}
if isWinningRev {
doc.SyncData.currentRevChannels = newChannels
}
if changed != nil {
base.InfofCtx(ctx, base.KeyCRUD, "\tDoc %q / %q in channels %q", base.UD(doc.ID), doc.CurrentRev, base.UD(newChannels))
changedChannels, err = channels.SetFromArray(changed, channels.KeepStar)
Expand All @@ -981,6 +999,7 @@
// Set of channels returned from IsChannelRemoval are "Active" channels and NOT "Removed".
func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBytes []byte, history Revisions, channels base.Set, isRemoval bool, isDelete bool, err error) {

activeChannels := make(base.Set)
removedChannels := make(base.Set)

// Iterate over the document's channel history, looking for channels that were removed at revID. If found, also identify whether the removal was a tombstone.
Expand All @@ -990,25 +1009,16 @@
if removal.Deleted == true {
isDelete = true
}
} else {
activeChannels[channel] = struct{}{}
}
}

// If no matches found, return isRemoval=false
if len(removedChannels) == 0 {
return nil, nil, nil, false, false, nil
}

// Construct removal body
// doc ID and rev ID aren't required to be inserted here, as both of those are available in the request.
bodyBytes = []byte(RemovedRedactedDocument)

activeChannels := make(base.Set)
// Add active channels to the channel set if the the revision is available in the revision tree.
if revInfo, ok := doc.History[revID]; ok {
for channel, _ := range revInfo.Channels {
activeChannels[channel] = struct{}{}
}
}

// Build revision history for revID
revHistory, err := doc.History.getHistory(revID)
if err != nil {
Expand All @@ -1021,6 +1031,10 @@
}
history = encodeRevisions(ctx, doc.ID, revHistory)

// Construct removal body
// doc ID and rev ID aren't required to be inserted here, as both of those are available in the request.
bodyBytes = []byte(RemovedRedactedDocument)

return bodyBytes, history, activeChannels, true, isDelete, nil
}

Expand Down Expand Up @@ -1217,3 +1231,29 @@

return data, xdata, nil
}

// channelsForRev returns the set of channels the given revision is in for the document
// Channel information is only stored for leaf nodes in the revision tree, as we don't keep full history of channel information
func (doc *Document) channelsForRev(revid string) (base.Set, bool) {
if revid == "" || doc.CurrentRev == revid {
return doc.currentRevChannels, true
}

if rev, ok := doc.History[revid]; ok {
return rev.Channels, true
}

// no rev
return nil, false
}

// Returns a set of the current (winning revision's) channels for the document.
func (doc *Document) currentChannels() base.Set {

Check failure on line 1251 in db/document.go

View workflow job for this annotation

GitHub Actions / lint

func `(*Document).currentChannels` is unused (unused)

Check failure on line 1251 in db/document.go

View workflow job for this annotation

GitHub Actions / lint

func `(*Document).currentChannels` is unused (unused)
ch := base.SetOf()
for channelName, channelRemoval := range doc.Channels {
if channelRemoval == nil || channelRemoval.Seq == 0 {
ch.Add(channelName)
}
}
return ch
}
5 changes: 4 additions & 1 deletion db/revision_cache_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,10 @@ func revCacheLoaderForDocument(ctx context.Context, backingStore RevisionCacheBa
return bodyBytes, body, history, channels, removed, nil, deleted, nil, getHistoryErr
}
history = encodeRevisions(ctx, doc.ID, validatedHistory)
channels = doc.History[revid].Channels

if revChannels, ok := doc.channelsForRev(revid); ok {
channels = revChannels
}

return bodyBytes, body, history, channels, removed, attachments, deleted, doc.Expiry, err
}
10 changes: 9 additions & 1 deletion db/revision_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"testing"

"github.com/couchbase/sync_gateway/base"
"github.com/couchbase/sync_gateway/channels"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -47,9 +48,16 @@ func (t *testBackingStore) GetDocument(ctx context.Context, docid string, unmars
doc.CurrentRev = "1-abc"
doc.History = RevTree{
doc.CurrentRev: {
Channels: base.SetOf("*"),
ID: doc.CurrentRev,
},
}

doc.Channels = channels.ChannelMap{
"*": &channels.ChannelRemoval{RevID: doc.CurrentRev},
}
// currentRevChannels usually populated on JSON unmarshal
doc.currentRevChannels = doc.getCurrentChannels()

return doc, nil
}

Expand Down
Loading
Loading