diff --git a/CHANGELOG.md b/CHANGELOG.md index 78e547c21..c9a1ed078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +### Improvements + +- Add `Repair013Orphans()` to repair faulty orphans in a database last written to by IAVL 0.13.x + ### Bug Fixes - Remove unnecessary Protobuf dependencies diff --git a/repair.go b/repair.go new file mode 100644 index 000000000..157eb1fc3 --- /dev/null +++ b/repair.go @@ -0,0 +1,65 @@ +package iavl + +import ( + "math" + + "github.com/pkg/errors" + dbm "github.com/tendermint/tm-db" +) + +// Repair013Orphans repairs incorrect orphan entries written by IAVL 0.13 pruning. To use it, close +// a database using IAVL 0.13, make a backup copy, and then run this function before opening the +// database with IAVL 0.14 or later. It returns the number of faulty orphan entries removed. If the +// 0.13 database was written with KeepEvery:1 (the default) or the last version _ever_ saved to the +// tree was a multiple of `KeepEvery` and thus saved to disk, this repair is not necessary. +// +// Note that this cannot be used directly on Cosmos SDK databases, since they store multiple IAVL +// trees in the same underlying database via a prefix scheme. +// +// The pruning functionality enabled with Options.KeepEvery > 1 would write orphans entries to disk +// for versions that should only have been saved in memory, and these orphan entries were clamped +// to the last version persisted to disk instead of the version that generated them (so a delete at +// version 749 might generate an orphan entry ending at version 700 for KeepEvery:100). If the +// database is reopened at the last persisted version and this version is later deleted, the +// orphaned nodes can be deleted prematurely or incorrectly, causing data loss and database +// corruption. +// +// This function removes these incorrect orphan entries by deleting all orphan entries that have a +// to-version equal to or greater than the latest persisted version. Correct orphans will never +// have this, since they must have been deleted in a future (non-existent) version for that to be +// the case. +func Repair013Orphans(db dbm.DB) (uint64, error) { + ndb := newNodeDB(db, 0, &Options{Sync: true}) + version := ndb.getLatestVersion() + if version == 0 { + return 0, errors.New("no versions found") + } + + var ( + repaired uint64 + err error + ) + batch := db.NewBatch() + defer batch.Close() + ndb.traverseRange(orphanKeyFormat.Key(version), orphanKeyFormat.Key(math.MaxInt64), func(k, v []byte) { + // Sanity check so we don't remove stuff we shouldn't + var toVersion int64 + orphanKeyFormat.Scan(k, &toVersion) + if toVersion < version { + err = errors.Errorf("Found unexpected orphan with toVersion=%v, lesser than latest version %v", + toVersion, version) + return + } + repaired++ + batch.Delete(k) + }) + if err != nil { + return 0, err + } + err = batch.WriteSync() + if err != nil { + return 0, err + } + + return repaired, nil +} diff --git a/repair_test.go b/repair_test.go new file mode 100644 index 000000000..ec6b598b3 --- /dev/null +++ b/repair_test.go @@ -0,0 +1,191 @@ +package iavl + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + dbm "github.com/tendermint/tm-db" +) + +func TestRepair013Orphans(t *testing.T) { + dir, err := ioutil.TempDir("", "test-iavl-repair") + require.NoError(t, err) + defer os.RemoveAll(dir) + + // There is also 0.13-orphans-v6.db containing a database closed immediately after writing + // version 6, which should not contain any broken orphans. + err = copyDB("testdata/0.13-orphans.db", filepath.Join(dir, "0.13-orphans.db")) + require.NoError(t, err) + + db, err := dbm.NewGoLevelDB("0.13-orphans", dir) + require.NoError(t, err) + + // Repair the database. + repaired, err := Repair013Orphans(db) + require.NoError(t, err) + assert.EqualValues(t, 8, repaired) + + // Load the database. + tree, err := NewMutableTreeWithOpts(db, 0, &Options{Sync: true}) + require.NoError(t, err) + version, err := tree.Load() + require.NoError(t, err) + require.EqualValues(t, 6, version) + + // We now generate two empty versions, and check all persisted versions. + _, version, err = tree.SaveVersion() + require.NoError(t, err) + require.EqualValues(t, 7, version) + _, version, err = tree.SaveVersion() + require.NoError(t, err) + require.EqualValues(t, 8, version) + + // Check all persisted versions. + require.Equal(t, []int{3, 6, 7, 8}, tree.AvailableVersions()) + assertVersion(t, tree, 0) + assertVersion(t, tree, 3) + assertVersion(t, tree, 6) + assertVersion(t, tree, 7) + assertVersion(t, tree, 8) + + // We then delete version 6 (the last persisted one with 0.13). + err = tree.DeleteVersion(6) + require.NoError(t, err) + + // Reading "rm7" (which should not have been deleted now) would panic with a broken database. + _, value := tree.Get([]byte("rm7")) + require.Equal(t, []byte{1}, value) + + // Check all persisted versions. + require.Equal(t, []int{3, 7, 8}, tree.AvailableVersions()) + assertVersion(t, tree, 0) + assertVersion(t, tree, 3) + assertVersion(t, tree, 7) + assertVersion(t, tree, 8) + + // Delete all historical versions, and check the latest. + err = tree.DeleteVersion(3) + require.NoError(t, err) + err = tree.DeleteVersion(7) + require.NoError(t, err) + + require.Equal(t, []int{8}, tree.AvailableVersions()) + assertVersion(t, tree, 0) + assertVersion(t, tree, 8) +} + +// assertVersion checks the given version (or current if 0) against the expected values. +func assertVersion(t *testing.T, tree *MutableTree, version int64) { + var err error + itree := tree.ImmutableTree + if version > 0 { + itree, err = tree.GetImmutable(version) + require.NoError(t, err) + } + version = itree.version + + // The "current" value should have the current version for <= 6, then 6 afterwards + _, value := itree.Get([]byte("current")) + if version >= 6 { + require.EqualValues(t, []byte{6}, value) + } else { + require.EqualValues(t, []byte{byte(version)}, value) + } + + // The "addX" entries should exist for 1-6 in the respective versions, and the + // "rmX" entries should have been removed for 1-6 in the respective versions. + for i := byte(1); i < 8; i++ { + _, value = itree.Get([]byte(fmt.Sprintf("add%v", i))) + if i <= 6 && int64(i) <= version { + require.Equal(t, []byte{i}, value) + } else { + require.Nil(t, value) + } + + _, value = itree.Get([]byte(fmt.Sprintf("rm%v", i))) + if i <= 6 && version >= int64(i) { + require.Nil(t, value) + } else { + require.Equal(t, []byte{1}, value) + } + } +} + +// Generate013Orphans generates a GoLevelDB orphan database in testdata/0.13-orphans.db +// for testing Repair013Orphans(). It must be run with IAVL 0.13.x. +/*func TestGenerate013Orphans(t *testing.T) { + err := os.RemoveAll("testdata/0.13-orphans.db") + require.NoError(t, err) + db, err := dbm.NewGoLevelDB("0.13-orphans", "testdata") + require.NoError(t, err) + tree, err := NewMutableTreeWithOpts(db, dbm.NewMemDB(), 0, &Options{ + KeepEvery: 3, + KeepRecent: 1, + Sync: true, + }) + require.NoError(t, err) + version, err := tree.Load() + require.NoError(t, err) + require.EqualValues(t, 0, version) + + // We generate 8 versions. In each version, we create a "addX" key, delete a "rmX" key, + // and update the "current" key, where "X" is the current version. Values are the version in + // which the key was last set. + tree.Set([]byte("rm1"), []byte{1}) + tree.Set([]byte("rm2"), []byte{1}) + tree.Set([]byte("rm3"), []byte{1}) + tree.Set([]byte("rm4"), []byte{1}) + tree.Set([]byte("rm5"), []byte{1}) + tree.Set([]byte("rm6"), []byte{1}) + tree.Set([]byte("rm7"), []byte{1}) + tree.Set([]byte("rm8"), []byte{1}) + + for v := byte(1); v <= 8; v++ { + tree.Set([]byte("current"), []byte{v}) + tree.Set([]byte(fmt.Sprintf("add%v", v)), []byte{v}) + tree.Remove([]byte(fmt.Sprintf("rm%v", v))) + _, version, err = tree.SaveVersion() + require.NoError(t, err) + require.EqualValues(t, v, version) + } + + // At this point, the database will contain incorrect orphans in version 6 that, when + // version 6 is deleted, will cause "current", "rm7", and "rm8" to go missing. +}*/ + +// copyDB makes a shallow copy of the source database directory. +func copyDB(src, dest string) error { + entries, err := ioutil.ReadDir(src) + if err != nil { + return err + } + err = os.MkdirAll(dest, 0777) + if err != nil { + return err + } + for _, entry := range entries { + out, err := os.Create(filepath.Join(dest, entry.Name())) + if err != nil { + return err + } + defer out.Close() + + in, err := os.Open(filepath.Join(src, entry.Name())) + defer in.Close() // nolint + if err != nil { + return err + } + + _, err = io.Copy(out, in) + if err != nil { + return err + } + } + return nil +} diff --git a/testdata/0.13-orphans-v6.db/000001.log b/testdata/0.13-orphans-v6.db/000001.log new file mode 100644 index 000000000..13bc49ab4 Binary files /dev/null and b/testdata/0.13-orphans-v6.db/000001.log differ diff --git a/testdata/0.13-orphans-v6.db/CURRENT b/testdata/0.13-orphans-v6.db/CURRENT new file mode 100644 index 000000000..feda7d6b2 --- /dev/null +++ b/testdata/0.13-orphans-v6.db/CURRENT @@ -0,0 +1 @@ +MANIFEST-000000 diff --git a/testdata/0.13-orphans-v6.db/LOCK b/testdata/0.13-orphans-v6.db/LOCK new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/0.13-orphans-v6.db/LOG b/testdata/0.13-orphans-v6.db/LOG new file mode 100644 index 000000000..f890e80b8 --- /dev/null +++ b/testdata/0.13-orphans-v6.db/LOG @@ -0,0 +1,6 @@ +=============== Jun 25, 2020 (CEST) =============== +14:30:10.673317 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +14:30:10.688689 db@open opening +14:30:10.689548 version@stat F·[] S·0B[] Sc·[] +14:30:10.702481 db@janitor F·2 G·0 +14:30:10.702564 db@open done T·13.82376ms diff --git a/testdata/0.13-orphans-v6.db/MANIFEST-000000 b/testdata/0.13-orphans-v6.db/MANIFEST-000000 new file mode 100644 index 000000000..9d54f6733 Binary files /dev/null and b/testdata/0.13-orphans-v6.db/MANIFEST-000000 differ diff --git a/testdata/0.13-orphans.db/000001.log b/testdata/0.13-orphans.db/000001.log new file mode 100644 index 000000000..95ef16dca Binary files /dev/null and b/testdata/0.13-orphans.db/000001.log differ diff --git a/testdata/0.13-orphans.db/CURRENT b/testdata/0.13-orphans.db/CURRENT new file mode 100644 index 000000000..feda7d6b2 --- /dev/null +++ b/testdata/0.13-orphans.db/CURRENT @@ -0,0 +1 @@ +MANIFEST-000000 diff --git a/testdata/0.13-orphans.db/LOCK b/testdata/0.13-orphans.db/LOCK new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/0.13-orphans.db/LOG b/testdata/0.13-orphans.db/LOG new file mode 100644 index 000000000..711c5a08f --- /dev/null +++ b/testdata/0.13-orphans.db/LOG @@ -0,0 +1,6 @@ +=============== Jun 25, 2020 (CEST) =============== +13:31:22.162368 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed +13:31:22.173177 db@open opening +13:31:22.173961 version@stat F·[] S·0B[] Sc·[] +13:31:22.189072 db@janitor F·2 G·0 +13:31:22.189117 db@open done T·15.875399ms diff --git a/testdata/0.13-orphans.db/MANIFEST-000000 b/testdata/0.13-orphans.db/MANIFEST-000000 new file mode 100644 index 000000000..9d54f6733 Binary files /dev/null and b/testdata/0.13-orphans.db/MANIFEST-000000 differ