diff --git a/dictionary.go b/dictionary.go new file mode 100644 index 0000000..4eaa6bf --- /dev/null +++ b/dictionary.go @@ -0,0 +1,173 @@ +package collection + +import ( + "sort" +) + +type trieNode struct { + IsWord bool + Children map[rune]*trieNode +} + +func (node trieNode) Population() uint { + var sum uint + + for _, child := range node.Children { + sum += child.Population() + } + + if node.IsWord { + sum++ + } + + return sum +} + +func (node *trieNode) Navigate(word string) *trieNode { + cursor := node + for len(word) > 0 && cursor != nil { + if next, ok := cursor.Children[rune(word[0])]; ok { + cursor = next + word = word[1:] + } else { + return nil + } + } + return cursor +} + +// Dictionary is a list of words. It is implemented as a Trie for memory efficiency. +type Dictionary struct { + root *trieNode + size int64 +} + +// Add inserts a word into the dictionary, and returns whether or not that word was a new word. +// +// Time complexity: O(m) where 'm' is the length of word. +func (dict *Dictionary) Add(word string) (wasAdded bool) { + if dict.root == nil { + dict.root = &trieNode{} + } + + cursor := dict.root + + for len(word) > 0 { + if cursor.Children == nil { + cursor.Children = make(map[rune]*trieNode) + } + + nextLetter := rune(word[0]) + + next, ok := cursor.Children[nextLetter] + if !ok { + next = &trieNode{} + cursor.Children[nextLetter] = next + } + cursor = next + word = word[1:] + } + wasAdded = !cursor.IsWord + if wasAdded { + dict.size++ + } + cursor.IsWord = true + return +} + +// Clear removes all items from the dictionary. +func (dict *Dictionary) Clear() { + dict.root = nil + dict.size = 0 +} + +// Contains searches the Dictionary to see if the specified word is present. +// +// Time complexity: O(m) where 'm' is the length of word. +func (dict Dictionary) Contains(word string) bool { + if dict.root == nil { + return false + } + targetNode := dict.root.Navigate(word) + return targetNode != nil && targetNode.IsWord +} + +// Remove ensures that `word` is not in the Dictionary. Returns whether or not an item was removed. +// +// Time complexity: O(m) where 'm' is the length of word. +func (dict *Dictionary) Remove(word string) (wasRemoved bool) { + lastPos := len(word) - 1 + parent := dict.root.Navigate(word[:lastPos]) + if parent == nil { + return + } + + lastLetter := rune(word[lastPos]) + + subject, ok := parent.Children[lastLetter] + if !ok { + return + } + + wasRemoved = subject.IsWord + + if wasRemoved { + dict.size-- + } + + subject.IsWord = false + if subject.Population() == 0 { + delete(parent.Children, lastLetter) + } + return +} + +// Size reports the number of words there are in the Dictionary. +// +// Time complexity: O(1) +func (dict Dictionary) Size() int64 { + return dict.size +} + +// Enumerate lists each word in the Dictionary alphabetically. +func (dict Dictionary) Enumerate(cancel <-chan struct{}) Enumerator { + if dict.root == nil { + return Empty.Enumerate(cancel) + } + return dict.root.Enumerate(cancel) +} + +func (node trieNode) Enumerate(cancel <-chan struct{}) Enumerator { + var enumerateHelper func(trieNode, string) + + results := make(chan interface{}) + + enumerateHelper = func(subject trieNode, prefix string) { + if subject.IsWord { + select { + case results <- prefix: + case <-cancel: + return + } + } + + alphabetizedChildren := []rune{} + for letter := range subject.Children { + alphabetizedChildren = append(alphabetizedChildren, letter) + } + sort.Slice(alphabetizedChildren, func(i, j int) bool { + return alphabetizedChildren[i] < alphabetizedChildren[j] + }) + + for _, letter := range alphabetizedChildren { + enumerateHelper(*subject.Children[letter], prefix+string(letter)) + } + } + + go func() { + defer close(results) + enumerateHelper(node, "") + }() + + return results +} diff --git a/dictionary_examples_test.go b/dictionary_examples_test.go new file mode 100644 index 0000000..42917ee --- /dev/null +++ b/dictionary_examples_test.go @@ -0,0 +1,93 @@ +package collection_test + +import ( + "fmt" + "strings" + + "github.com/marstr/collection" +) + +func ExampleDictionary_Add() { + subject := &collection.Dictionary{} + + const example = "hello" + fmt.Println(subject.Contains(example)) + fmt.Println(subject.Size()) + subject.Add(example) + fmt.Println(subject.Contains(example)) + fmt.Println(subject.Size()) + + // Output: + // false + // 0 + // true + // 1 +} + +func ExampleDictionary_Clear() { + subject := &collection.Dictionary{} + + subject.Add("hello") + subject.Add("world") + + fmt.Println(subject.Size()) + fmt.Println(collection.CountAll(subject)) + + subject.Clear() + + fmt.Println(subject.Size()) + fmt.Println(collection.Any(subject)) + + // Output: + // 2 + // 2 + // 0 + // false +} + +func ExampleDictionary_Enumerate() { + subject := &collection.Dictionary{} + subject.Add("world") + subject.Add("hello") + + upperCase := collection.Select(subject, func(x interface{}) interface{} { + return strings.ToUpper(x.(string)) + }) + + for word := range subject.Enumerate(nil) { + fmt.Println(word) + } + + for word := range upperCase.Enumerate(nil) { + fmt.Println(word) + } + + // Output: + // hello + // world + // HELLO + // WORLD +} + +func ExampleDictionary_Remove() { + const world = "world" + subject := &collection.Dictionary{} + subject.Add("hello") + subject.Add(world) + + fmt.Println(subject.Size()) + fmt.Println(collection.CountAll(subject)) + + subject.Remove(world) + + fmt.Println(subject.Size()) + fmt.Println(collection.CountAll(subject)) + fmt.Println(collection.Any(subject)) + + // Output: + // 2 + // 2 + // 1 + // 1 + // true +} \ No newline at end of file diff --git a/dictionary_test.go b/dictionary_test.go new file mode 100644 index 0000000..2072378 --- /dev/null +++ b/dictionary_test.go @@ -0,0 +1,168 @@ +package collection + +import ( + "strings" + "testing" +) + +func TestDictionary_Enumerate(t *testing.T) { + dictSets := [][]string{ + {"alpha", "beta", "charlie"}, + {"also", "always"}, + {"canned", "beans"}, + {"duplicated", "duplicated", "after"}, + } + + for _, ds := range dictSets { + t.Run("", func(t *testing.T) { + subject := Dictionary{} + expected := make(map[string]bool) + added := 0 + for _, entry := range ds { + if subject.Add(entry) { + added++ + } + expected[entry] = false + } + + expectedSize := len(expected) + + if added != expectedSize { + t.Logf("`Add` returned true %d times, expected %d times", added, expectedSize) + t.Fail() + } + + if subjectSize := CountAll(subject); subjectSize != expectedSize { + t.Logf("`CountAll` returned %d elements, expected %d", subjectSize, expectedSize) + t.Fail() + } + + prev := "" + for result := range subject.Enumerate(nil) { + t.Logf(result.(string)) + if alreadySeen, ok := expected[result.(string)]; !ok { + t.Logf("An unadded value was returned") + t.Fail() + } else if alreadySeen { + t.Logf("\"%s\" was duplicated", result.(string)) + t.Fail() + } + + if stringle(result.(string), prev) { + t.Logf("Results \"%s\" and \"%s\" were not alphabetized.", prev, result.(string)) + t.Fail() + } + prev = result.(string) + + expected[result.(string)] = true + } + }) + } +} + +func TestDictionary_Add(t *testing.T) { + subject := Dictionary{} + + subject.Add("word") + + if rootChildrenCount := len(subject.root.Children); rootChildrenCount != 1 { + t.Logf("The root should only have one child, got %d instead.", rootChildrenCount) + t.Fail() + } + + if retreived, ok := subject.root.Children['w']; ok { + leaf := retreived.Navigate("ord") + if leaf == nil { + t.Log("Unable to navigate from `w`") + t.Fail() + } else if !leaf.IsWord { + t.Log("leaf should have been a word") + t.Fail() + } + } else { + t.Log("Root doesn't have child for `w`") + t.Fail() + } +} + +func TestTrieNode_Navigate(t *testing.T) { + leaf := trieNode{ + IsWord: true, + } + subject := trieNode{ + Children: map[rune]*trieNode{ + 'a': &trieNode{ + Children: map[rune]*trieNode{ + 'b': &trieNode{ + Children: map[rune]*trieNode{ + 'c': &leaf, + }, + }, + }, + }, + }, + } + + testCases := []struct { + address string + expected *trieNode + }{ + {"abc", &leaf}, + {"abd", nil}, + {"", &subject}, + {"a", subject.Children['a']}, + } + + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + if result := subject.Navigate(tc.address); result != tc.expected { + t.Logf("got: %v want: %v", result, tc.expected) + t.Fail() + } + }) + } +} + +func Test_stringle(t *testing.T) { + testCases := []struct { + left string + right string + expected bool + }{ + {"a", "b", true}, + {"b", "a", false}, + {"a", "a", true}, + {"alpha", "b", true}, + {"a", "beta", true}, + {"alpha", "alpha", true}, + {"alpha", "alphabet", true}, + {"alphabet", "alpha", false}, + {"", "a", true}, + {"", "", true}, + } + + for _, tc := range testCases { + t.Run(strings.Join([]string{tc.left, tc.right}, ","), func(t *testing.T) { + if got := stringle(tc.left, tc.right); got != tc.expected { + t.Logf("got: %v want: %v", got, tc.expected) + t.Fail() + } + }) + } +} + +func stringle(left, right string) bool { + other := []byte(right) + for i, letter := range []byte(left) { + if i >= len(other) { + return false + } + + if letter > other[i] { + return false + } else if letter < other[i] { + break + } + } + return true +}