Skip to content

Commit

Permalink
feat: improve "InsertIfAbsent" to update the same document when a new…
Browse files Browse the repository at this point in the history
… field is introduced

- Added a new test function `TestBatch_InsertAndUpdateContent` to verify the update of a document with new content.
- Ensured that the document count remains the same after updating the document.
- Verified the updated content field in the document.

Signed-off-by: Gao Hongtao <[email protected]>
  • Loading branch information
hanahmily committed Dec 2, 2024
1 parent c317df1 commit 7db485c
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 9 deletions.
4 changes: 3 additions & 1 deletion index/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Batch struct {
ids []segment.Term
unparsedDocuments []segment.Document
unparsedIDs []segment.Term
fieldNames [][]string
persistedCallback func(error)
}

Expand All @@ -32,9 +33,10 @@ func (b *Batch) Insert(doc segment.Document) {
b.documents = append(b.documents, doc)
}

func (b *Batch) InsertIfAbsent(id segment.Term, doc segment.Document) {
func (b *Batch) InsertIfAbsent(id segment.Term, fieldNames []string, doc segment.Document) {
b.unparsedDocuments = append(b.unparsedDocuments, doc)
b.unparsedIDs = append(b.unparsedIDs, id)
b.fieldNames = append(b.fieldNames, fieldNames)
}

func (b *Batch) Update(id segment.Term, doc segment.Document) {
Expand Down
27 changes: 21 additions & 6 deletions index/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,15 +306,30 @@ func (s *Writer) removeExistingDocuments(batch *Batch) error {
return err
}

OUTER:
for i := 0; i < len(batch.unparsedIDs); i++ {
if ok, _ := dict.Contains(batch.unparsedIDs[i].Term()); ok {
batch.unparsedDocuments = append(batch.unparsedDocuments[:i], batch.unparsedDocuments[i+1:]...)
batch.unparsedIDs = append(batch.unparsedIDs[:i], batch.unparsedIDs[i+1:]...)
i--
if len(batch.unparsedDocuments) == 0 {
return nil
if ok, _ := dict.Contains(batch.unparsedIDs[i].Term()); !ok {
continue
}
fn := batch.fieldNames[i]
if len(fn) > 0 {
INNER:
for i := range fn {
for _, field := range seg.segment.Fields() {
if field == fn[i] {
continue INNER
}
}
// field not found in segment
continue OUTER
}
}
batch.unparsedDocuments = append(batch.unparsedDocuments[:i], batch.unparsedDocuments[i+1:]...)
batch.unparsedIDs = append(batch.unparsedIDs[:i], batch.unparsedIDs[i+1:]...)
i--
if len(batch.unparsedDocuments) == 0 {
return nil
}
}
}
if len(batch.unparsedDocuments) > 0 {
Expand Down
109 changes: 107 additions & 2 deletions index/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1678,7 +1678,7 @@ func TestBatch_InsertIfAbsent(t *testing.T) {
NewFakeField("title", "mister", false, false, true),
}
batch := NewBatch()
batch.InsertIfAbsent(testIdentifier(docID), doc)
batch.InsertIfAbsent(testIdentifier(docID), []string{"title"}, doc)

// Apply the batch
if err := idx.Batch(batch); err != nil {
Expand Down Expand Up @@ -1709,7 +1709,7 @@ func TestBatch_InsertIfAbsent(t *testing.T) {
NewFakeField("title", "mister2", true, false, true),
}
batchDuplicate := NewBatch()
batchDuplicate.InsertIfAbsent(testIdentifier(docID), docDuplicate)
batchDuplicate.InsertIfAbsent(testIdentifier(docID), []string{"title"}, docDuplicate)

// Apply the duplicate batch
if err := idx.Batch(batchDuplicate); err != nil {
Expand Down Expand Up @@ -1767,3 +1767,108 @@ func TestBatch_InsertIfAbsent(t *testing.T) {
t.Fatal(err)
}
}

func TestBatch_InsertAndUpdateContent(t *testing.T) {
cfg, cleanup := CreateConfig("TestBatch_InsertAndUpdateContent")
defer func() {
err := cleanup()
if err != nil {
t.Log(err)
}
}()

idx, err := OpenWriter(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()

var expectedCount uint64

// Insert a document
docID := "doc-1"
doc := &FakeDocument{
NewFakeField("_id", docID, true, false, false),
NewFakeField("title", "mister", false, false, true),
}
batch := NewBatch()
batch.InsertIfAbsent(testIdentifier(docID), []string{"title"}, doc)

// Apply the batch
if err := idx.Batch(batch); err != nil {
t.Fatalf("failed to apply batch: %v", err)
}
expectedCount++

// Verify document count after insertion
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.Count()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}

// Update the document with new content
docUpdated := &FakeDocument{
NewFakeField("_id", docID, true, false, false),
NewFakeField("title", "mister", false, false, true),
NewFakeField("content", "updated content", false, false, true),
}
batchUpdate := NewBatch()
batchUpdate.InsertIfAbsent(testIdentifier(docID), []string{"title", "content"}, docUpdated)

// Apply the update batch
if err := idx.Batch(batchUpdate); err != nil {
t.Fatalf("failed to apply update batch: %v", err)
}

// Verify document count remains the same
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.Count()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d after update, got %d", expectedCount, docCount)
}

docNum1, err := findNumberByID(reader, docID)
if err != nil {
t.Fatal(err)
}

// Verify the updated content
err = reader.VisitStoredFields(docNum1, func(field string, value []byte) bool {
if field == "content" {
if string(value) != "updated content" {
t.Errorf("expected content to be 'updated content', got '%s'", string(value))
}
}
return true
})
if err != nil {
t.Fatal(err)
}

err = reader.Close()
if err != nil {
t.Fatal(err)
}
}

0 comments on commit 7db485c

Please sign in to comment.