diff --git a/diskv.go b/diskv.go index e729425..9f07b85 100644 --- a/diskv.go +++ b/diskv.go @@ -630,6 +630,9 @@ func (d *Diskv) completeFilename(pathKey *PathKey) string { // cacheWithLock attempts to cache the given key-value pair in the store's // cache. It can fail if the value is larger than the cache's maximum size. func (d *Diskv) cacheWithLock(key string, val []byte) error { + // If the key already exists, delete it. + d.bustCacheWithLock(key) + valueSize := uint64(len(val)) if err := d.ensureCacheSpaceWithLock(valueSize); err != nil { return fmt.Errorf("%s; not caching", err) diff --git a/issues_test.go b/issues_test.go index 914269c..5bf9e35 100644 --- a/issues_test.go +++ b/issues_test.go @@ -3,6 +3,7 @@ package diskv import ( "bytes" "io/ioutil" + "math/rand" "sync" "testing" "time" @@ -118,3 +119,73 @@ func TestIssue17(t *testing.T) { close(start) wg.Wait() } + +// Test for issue #40, where acquiring two stream readers on the same k/v pair +// caused the value to be written into the cache twice, messing up the +// size calculations. +func TestIssue40(t *testing.T) { + var ( + basePath = "test-data" + ) + // Simplest transform function: put all the data files into the base dir. + flatTransform := func(s string) []string { return []string{} } + + // Initialize a new diskv store, rooted at "my-data-dir", + // with a 100 byte cache. + d := New(Options{ + BasePath: basePath, + Transform: flatTransform, + CacheSizeMax: 100, + }) + + defer d.EraseAll() + + // Write a 50 byte value, filling the cache half-way + k1 := "key1" + d1 := make([]byte, 50) + rand.Read(d1) + d.Write(k1, d1) + + // Get *two* read streams on it. Because the key is not yet in the cache, + // and will not be in the cache until a stream is fully read, both + // readers use the 'siphon' object, which always writes to the cache + // after reading. + s1, err := d.ReadStream(k1, false) + if err != nil { + t.Fatal(err) + } + s2, err := d.ReadStream(k1, false) + if err != nil { + t.Fatal(err) + } + // When each stream is drained, the underlying siphon will write + // the value into the cache's map and increment the cache size. + // This means we will have 1 entry in the cache map + // ("key1" mapping to a 50 byte slice) but the cache size will be 100, + // because the buggy code does not check if an entry already exists + // in the map. + // s1 drains: + // cache[k] = v + // cacheSize += len(v) + // s2 drains: + // cache[k] = v /* overwrites existing */ + // cacheSize += len(v) /* blindly adds to the cache size */ + ioutil.ReadAll(s1) + ioutil.ReadAll(s2) + + // Now write a different k/v pair, with a 60 byte array. + k2 := "key2" + d2 := make([]byte, 60) + rand.Read(d2) + d.Write(k2, d2) + // The act of reading the k/v pair back out causes it to be cached. + // Because the cache is only 100 bytes, it needs to delete existing + // entries to make room. + // If the cache is buggy, it will delete the single 50-byte entry + // from the cache map & decrement cacheSize by 50... but because + // cacheSize was improperly incremented twice earlier, this will + // leave us with no entries in the cacheMap but with cacheSize==50. + // Since CacheSizeMax-cacheSize (100-50) is less than 60, there + // is no room in the cache for this entry and it panics. + d.Read(k2) +}