Skip to content

Commit

Permalink
Merge pull request #6485 from dolthub/daylon/ft-merge-fixes
Browse files Browse the repository at this point in the history
Fixed Full-Text merge bug, removed pseudo-index tables from view
  • Loading branch information
Hydrocharged authored Aug 14, 2023
2 parents cda023d + 4b86b73 commit 45ee8f3
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 11 deletions.
3 changes: 3 additions & 0 deletions go/cmd/dolt/commands/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,9 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs)

doltSchemasChanged := false
for _, delta := range deltas {
if doltdb.IsFullTextTable(delta.TableName) {
continue
}

// Don't print tables if one side of the diff is an ignored table in the working set being added.
if len(delta.FromTableName) == 0 {
Expand Down
8 changes: 8 additions & 0 deletions go/cmd/dolt/commands/indexcmds/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ func (cmd LsCmd) Exec(ctx context.Context, commandStr string, args []string, dEn
}
for _, index := range sch.Indexes().AllIndexes() {
output = append(output, fmt.Sprintf(" %s(%s)", index.Name(), strings.Join(index.ColumnNames(), ", ")))
if index.IsFullText() {
props := index.FullTextProperties()
output = append(output, fmt.Sprintf(" %s", props.ConfigTable))
output = append(output, fmt.Sprintf(" %s", props.PositionTable))
output = append(output, fmt.Sprintf(" %s", props.DocCountTable))
output = append(output, fmt.Sprintf(" %s", props.GlobalCountTable))
output = append(output, fmt.Sprintf(" %s", props.RowCountTable))
}
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions go/cmd/dolt/commands/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,10 @@ func calculateMergeStats(queryist cli.Queryist, sqlCtx *sql.Context, mergeStats
var allUnmodified = true
// get table operations
for _, summary := range diffSummaries {
// We want to ignore all statistics for Full-Text tables
if doltdb.IsFullTextTable(summary.TableName) {
continue
}
if summary.DiffType == "added" {
allUnmodified = false
mergeStats[summary.TableName] = &merge.MergeStats{
Expand Down
1 change: 1 addition & 0 deletions go/cmd/dolt/commands/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ func createPrintData(err error, queryist cli.Queryist, sqlCtx *sql.Context, show
}
shouldIgnoreTable = ignored == doltdb.Ignore
}
shouldIgnoreTable = shouldIgnoreTable || doltdb.IsFullTextTable(tableName)

switch status {
case "renamed":
Expand Down
32 changes: 32 additions & 0 deletions go/libraries/doltcore/env/actions/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ package actions
import (
"context"

"github.com/dolthub/dolt/go/libraries/doltcore/schema"

"github.com/dolthub/dolt/go/libraries/doltcore/diff"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/utils/set"
Expand All @@ -37,6 +39,36 @@ func MoveTablesBetweenRoots(ctx context.Context, tbls []string, src, dest *doltd
return nil, err
}

// We want to include all Full-Text tables for every move
for _, td := range tblDeltas {
var ftIndexes []schema.Index
if tblSet.Contains(td.ToName) && td.ToSch.Indexes().ContainsFullTextIndex() {
for _, idx := range td.ToSch.Indexes().AllIndexes() {
if !idx.IsFullText() {
continue
}
ftIndexes = append(ftIndexes, idx)
}
} else if tblSet.Contains(td.FromName) && td.FromSch.Indexes().ContainsFullTextIndex() {
for _, idx := range td.FromSch.Indexes().AllIndexes() {
if !idx.IsFullText() {
continue
}
ftIndexes = append(ftIndexes, idx)
}
}
for _, ftIndex := range ftIndexes {
props := ftIndex.FullTextProperties()
tblSet.Add(
props.ConfigTable,
props.PositionTable,
props.DocCountTable,
props.GlobalCountTable,
props.RowCountTable,
)
}
}

tblsToDrop := set.NewStrSet(nil)

for _, td := range tblDeltas {
Expand Down
21 changes: 21 additions & 0 deletions go/libraries/doltcore/merge/fulltext_rebuild.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ func rebuildFullTextIndexes(ctx *sql.Context, root *doltdb.RootValue) (*doltdb.R
if err != nil {
return nil, err
}
// Create a set that we'll check later to remove any orphaned pseudo-index tables.
// These may appear when a table is renamed on another branch and the index was recreated before merging.
foundTables := make(map[string]struct{})
// We'll purge the data from every Full-Text table so that we may rewrite their contents
for _, tblName := range allTableNames {
if !doltdb.IsFullTextTable(tblName) {
Expand Down Expand Up @@ -74,6 +77,8 @@ func rebuildFullTextIndexes(ctx *sql.Context, root *doltdb.RootValue) (*doltdb.R
if doltdb.IsFullTextTable(tblName) {
continue
}
// Add this table to the found tables, since it's not a pseudo-index table.
foundTables[tblName] = struct{}{}
tbl, ok, err := root.GetTable(ctx, tblName)
if err != nil {
return nil, err
Expand Down Expand Up @@ -101,6 +106,12 @@ func rebuildFullTextIndexes(ctx *sql.Context, root *doltdb.RootValue) (*doltdb.R
continue
}
props := idx.FullTextProperties()
// Add all of the pseudo-index tables as found tables
foundTables[props.ConfigTable] = struct{}{}
foundTables[props.PositionTable] = struct{}{}
foundTables[props.DocCountTable] = struct{}{}
foundTables[props.GlobalCountTable] = struct{}{}
foundTables[props.RowCountTable] = struct{}{}
// The config table is shared, and it's not written to during this process
if configTable == nil {
configTable, err = createFulltextTable(ctx, props.ConfigTable, root)
Expand Down Expand Up @@ -185,6 +196,16 @@ func rebuildFullTextIndexes(ctx *sql.Context, root *doltdb.RootValue) (*doltdb.R
}
}
}
// Our last loop removes any orphaned pseudo-index tables
for _, tblName := range allTableNames {
if _, found := foundTables[tblName]; found || !doltdb.IsFullTextTable(tblName) {
continue
}
root, err = root.RemoveTables(ctx, true, true, tblName)
if err != nil {
return nil, err
}
}
return root, nil
}

Expand Down
11 changes: 6 additions & 5 deletions go/libraries/doltcore/merge/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,16 +207,17 @@ func MergeRoots(
for _, tblName := range tblNames {
mergedTable, stats, err := merger.MergeTable(ctx, tblName, opts, mergeOpts)
if err != nil {
// If a Full-Text table was both modified and deleted, then we want to ignore the modification.
// The schema will reflect the deletion, so we need to treat it the same.
// If a Full-Text table was both modified and deleted, then we want to ignore the deletion.
// If there's a true conflict, then the parent table will catch the conflict.
if doltdb.IsFullTextTable(tblName) && errors.Is(ErrTableDeletedAndModified, err) {
stats = &MergeStats{Operation: TableRemoved}
stats = &MergeStats{Operation: TableModified}
} else {
return nil, err
}
}
if doltdb.IsFullTextTable(tblName) && stats.Operation == TableModified {
// For modified tables, we'll be rebuilding them, so we don't need to calculate the merge
if doltdb.IsFullTextTable(tblName) && (stats.Operation == TableModified || stats.Operation == TableRemoved) {
// We handle removal and modification later in the rebuilding process, so we'll skip those.
// We do not handle adding new tables, so we allow that to proceed.
continue
}
if mergedTable.conflict.Count() > 0 {
Expand Down
12 changes: 10 additions & 2 deletions go/libraries/doltcore/sqle/dtables/status_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,23 @@ func newStatusItr(ctx *sql.Context, st *StatusTable) (*StatusItr, error) {

rows := make([]statusTableRow, 0, len(stagedTables)+len(unstagedTables))
for _, td := range stagedTables {
tblName := tableName(td)
if doltdb.IsFullTextTable(tblName) {
continue
}
rows = append(rows, statusTableRow{
tableName: tableName(td),
tableName: tblName,
isStaged: true,
status: statusString(td),
})
}
for _, td := range unstagedTables {
tblName := tableName(td)
if doltdb.IsFullTextTable(tblName) {
continue
}
rows = append(rows, statusTableRow{
tableName: tableName(td),
tableName: tblName,
isStaged: false,
status: statusString(td),
})
Expand Down
141 changes: 137 additions & 4 deletions integration-tests/bats/fulltext.bats
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,27 @@ teardown() {
[[ "$output" =~ "| yzx | 1 |" ]] || false
[[ "$output" =~ "| zyx | 1 |" ]] || false

dolt add -A
# We add only the "test" table to verify that the Full-Text pseudo-index tables are automatically included.
# If they were not, then our later merge would produce incorrect results.
dolt add test
dolt commit -m "Initial commit"
dolt branch other

dolt sql -q "DELETE FROM test WHERE pk = 3;"
dolt add -A
dolt add test
dolt commit -m "Main commit"

dolt checkout other
dolt sql -q "INSERT INTO test VALUES (6, 'jak', 'mno'), (7, 'mno', 'bot');"
dolt add -A
dolt add test
dolt commit -m "Other commit"

dolt checkout main
dolt merge other
# Check that we don't output stats for the pseudo-index tables
run dolt merge other
[ "$status" -eq 0 ]
[[ ! "$output" =~ "dolt_" ]] || false
[[ "$output" =~ "1 tables changed" ]] || false
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_global_count;"
[[ "$output" =~ "| word | global_count |" ]] || false
[[ "$output" =~ "| abc | 1 |" ]] || false
Expand Down Expand Up @@ -222,3 +228,130 @@ teardown() {
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_row_count;"
[ "$status" -eq 0 ]
}

@test "fulltext: merge with renamed pseudo-index tables on main branch" {
dolt sql -q "CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), FULLTEXT idx (v1));"
dolt sql -q "INSERT INTO test VALUES (1, 'abc');"
dolt add -A
dolt commit -m "Initial commit"
dolt branch other
dolt sql -q "DROP INDEX idx ON test;"
dolt sql -q "INSERT INTO test VALUES (2, 'def');"
dolt sql -q "RENAME TABLE test TO test_temp;"
dolt sql -q "ALTER TABLE test_temp ADD FULLTEXT INDEX idx (v1);"
dolt sql -q "RENAME TABLE test_temp TO test;"
dolt add -A
dolt commit -m "Renamed pseudo-index tables"

dolt checkout other
dolt sql -q "INSERT INTO test VALUES (3, 'ghi');"
dolt add -A
dolt commit -m "Insertion commit"

dolt checkout main
dolt merge other
# Verify that we retain the main branch's pseudo-index tables
run dolt sql -q "SELECT * FROM dolt_test_fts_config"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_doc_count"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_global_count"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_position"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_row_count"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_fts_config"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_doc_count"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_global_count"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_position"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_row_count"
[ "$status" -eq 0 ]

run dolt sql -q "SELECT v1 FROM test WHERE MATCH(v1) AGAINST ('abc def ghi');" -r=json
[ "$status" -eq 0 ]
[[ "$output" =~ "{\"rows\": [{\"v1\":\"abc\"},{\"v1\":\"def\"},{\"v1\":\"ghi\"}]}" ]] || false
}

@test "fulltext: merge with renamed pseudo-index tables on other branch" {
dolt sql -q "CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), FULLTEXT idx (v1));"
dolt sql -q "INSERT INTO test VALUES (1, 'abc');"
dolt add -A
dolt commit -m "Initial commit"
dolt branch other
dolt sql -q "INSERT INTO test VALUES (2, 'def');"
dolt add -A
dolt commit -m "Insertion commit"

dolt checkout other
dolt sql -q "DROP INDEX idx ON test;"
dolt sql -q "INSERT INTO test VALUES (3, 'ghi');"
dolt sql -q "RENAME TABLE test TO test_temp;"
dolt sql -q "ALTER TABLE test_temp ADD FULLTEXT INDEX idx (v1);"
dolt sql -q "RENAME TABLE test_temp TO test;"
dolt add -A
dolt commit -m "Renamed pseudo-index tables"

dolt checkout main
dolt merge other
# Verify that we retain the main branch's pseudo-index tables
run dolt sql -q "SELECT * FROM dolt_test_fts_config"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_doc_count"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_global_count"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_position"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_idx_0_fts_row_count"
[ "$status" -eq 0 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_fts_config"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_doc_count"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_global_count"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_position"
[ "$status" -eq 1 ]
run dolt sql -q "SELECT * FROM dolt_test_temp_idx_0_fts_row_count"
[ "$status" -eq 1 ]

run dolt sql -q "SELECT v1 FROM test WHERE MATCH(v1) AGAINST ('abc def ghi');" -r=json
[ "$status" -eq 0 ]
[[ "$output" =~ "{\"rows\": [{\"v1\":\"abc\"},{\"v1\":\"def\"},{\"v1\":\"ghi\"}]}" ]] || false
}

@test "fulltext: psuedo-index tables do not show in dolt status" {
dolt sql -q "CREATE TABLE test_abc (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), FULLTEXT idx (v1));"
dolt sql -q "INSERT INTO test_abc VALUES (1, 'abc');"
run dolt status
[ "$status" -eq 0 ]
[[ "$output" =~ "test_abc" ]] || false
[[ ! "$output" =~ "dolt_" ]] || false
run dolt sql -q "SELECT * from dolt_status;"
[ "$status" -eq 0 ]
[[ "$output" =~ "test_abc" ]] || false
[[ ! "$output" =~ "dolt_" ]] || false
}

@test "fulltext: psuedo-index tables do not show in dolt diff" {
dolt sql -q "CREATE TABLE test_abc (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), FULLTEXT idx (v1));"
dolt add -A
dolt commit -m "Initial commit"
dolt sql -q "INSERT INTO test_abc VALUES (1, 'abc');"
dolt add -A
dolt commit -m "Inserted row"

run dolt diff HEAD HEAD~1
[ "$status" -eq 0 ]
[[ "$output" =~ "test_abc" ]] || false
[[ ! "$output" =~ "dolt_" ]] || false
run dolt diff HEAD~1 HEAD
[ "$status" -eq 0 ]
[[ "$output" =~ "test_abc" ]] || false
[[ ! "$output" =~ "dolt_" ]] || false
}

0 comments on commit 45ee8f3

Please sign in to comment.