diff --git a/ImageBase.HashBase.UnitTests/HashBaseUnitTests.cs b/ImageBase.HashBase.UnitTests/HashBaseUnitTests.cs new file mode 100644 index 0000000..5ef24d0 --- /dev/null +++ b/ImageBase.HashBase.UnitTests/HashBaseUnitTests.cs @@ -0,0 +1,220 @@ +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace ImageBase.HashBase.UnitTests +{ + public class HashBaseUnitTests + { + public static IEnumerable Strings => + new List + { + new object[] + { + new List() + { + new HashItem() { ObjectId = 1, Hash = 1 }, + new HashItem() { ObjectId = 2, Hash = 3 }, + new HashItem() { ObjectId = 3, Hash = 7 }, + new HashItem() { ObjectId = 4, Hash = 37 }, + new HashItem() { ObjectId = 5, Hash = 53 }, + new HashItem() { ObjectId = 6, Hash = 63 }, + new HashItem() { ObjectId = 7, Hash = 127 }, + new HashItem() { ObjectId = 8, Hash = 255 }, + new HashItem() { ObjectId = 9, Hash = 511}, + new HashItem() { ObjectId = 10, Hash = 512}, + new HashItem() { ObjectId = 11, Hash = 500}, + }, + 37, 2, 10,new List() { 1, 3, 4, 5 } + }, + + new object[] + { + new List() + { + new HashItem() { ObjectId = 1, Hash = 1 }, + new HashItem() { ObjectId = 2, Hash = 3 }, + new HashItem() { ObjectId = 3, Hash = 7 }, + new HashItem() { ObjectId = 4, Hash = 37 }, + new HashItem() { ObjectId = 5, Hash = 53 }, + new HashItem() { ObjectId = 6, Hash = 63 }, + new HashItem() { ObjectId = 7, Hash = 127 }, + new HashItem() { ObjectId = 8, Hash = 255 }, + new HashItem() { ObjectId = 9, Hash = 511}, + new HashItem() { ObjectId = 10, Hash = 512}, + new HashItem() { ObjectId = 11, Hash = 500}, + }, + 511, 2, 10,new List() { 7, 8, 9 } }, + + new object[] { + new List() + { + new HashItem() { ObjectId = 1, Hash = 1 }, + new HashItem() { ObjectId = 2, Hash = 3 }, + new HashItem() { ObjectId = 3, Hash = 7 }, + new HashItem() { ObjectId = 4, Hash = 37 }, + new HashItem() { ObjectId = 5, Hash = 53 }, + new HashItem() { ObjectId = 6, Hash = 63 }, + new HashItem() { ObjectId = 7, Hash = 127 }, + new HashItem() { ObjectId = 8, Hash = 255 }, + new HashItem() { ObjectId = 9, Hash = 511}, + new HashItem() { ObjectId = 10, Hash = 512}, + new HashItem() { ObjectId = 11, Hash = 500}, + }, + 31, 3, 10,new List() { 2, 3, 5, 6, 7, 8 } }, + + new object[] + { + new List() + { //Hashes + new HashItem() { ObjectId = 1, Hash = 1 }, //1 + new HashItem() { ObjectId = 2, Hash = 3 }, //11 + new HashItem() { ObjectId = 3, Hash = 7 }, //11 + new HashItem() { ObjectId = 4, Hash = 15 }, //111 + new HashItem() { ObjectId = 5, Hash = 31 }, //1111 + new HashItem() { ObjectId = 6, Hash = 63 }, //11111 + new HashItem() { ObjectId = 7, Hash = 127 }, //111111 + new HashItem() { ObjectId = 8, Hash = 255 }, //1111111 + new HashItem() { ObjectId = 9, Hash = 511}, //11111111 + new HashItem() { ObjectId = 10, Hash = 1023 },//111111111 + new HashItem() { ObjectId = 11, Hash = 2047 },//1111111111 + new HashItem() { ObjectId = 12, Hash = 4095 },//11111111111 + new HashItem() { ObjectId = 13, Hash = 8191 },//111111111111 + }, + 511, 1, 100,new List() { 8, 9, 10 }, + }, + + new object[] + { + new List() + { //Hashes + new HashItem() { ObjectId = 1, Hash = 1 }, //1 + new HashItem() { ObjectId = 2, Hash = 3 }, //11 + new HashItem() { ObjectId = 3, Hash = 7 }, //11 + new HashItem() { ObjectId = 4, Hash = 15 }, //111 + new HashItem() { ObjectId = 5, Hash = 31 }, //1111 + new HashItem() { ObjectId = 6, Hash = 63 }, //11111 + new HashItem() { ObjectId = 7, Hash = 127 }, //111111 + new HashItem() { ObjectId = 8, Hash = 255 }, //1111111 + new HashItem() { ObjectId = 9, Hash = 511}, //11111111 + new HashItem() { ObjectId = 10, Hash = 1023 },//111111111 + new HashItem() { ObjectId = 11, Hash = 2047 },//1111111111 + new HashItem() { ObjectId = 12, Hash = 4095 },//11111111111 + new HashItem() { ObjectId = 13, Hash = 8191 },//111111111111 + }, + 511, 2, 100,new List() { 7, 8, 9, 10, 11 } + }, + + new object[] + { + new List() + { //Hashes + new HashItem() { ObjectId = 1, Hash = 1 }, //1 + new HashItem() { ObjectId = 2, Hash = 3 }, //11 + new HashItem() { ObjectId = 3, Hash = 7 }, //11 + new HashItem() { ObjectId = 4, Hash = 15 }, //111 + new HashItem() { ObjectId = 5, Hash = 31 }, //1111 + new HashItem() { ObjectId = 6, Hash = 63 }, //11111 + new HashItem() { ObjectId = 7, Hash = 127 }, //111111 + new HashItem() { ObjectId = 8, Hash = 255 }, //1111111 + new HashItem() { ObjectId = 9, Hash = 511}, //11111111 + new HashItem() { ObjectId = 10, Hash = 1023 },//111111111 + new HashItem() { ObjectId = 11, Hash = 2047 },//1111111111 + new HashItem() { ObjectId = 12, Hash = 4095 },//11111111111 + new HashItem() { ObjectId = 13, Hash = 8191 },//111111111111 + }, + 31, 4, 100,new List() { 1, 2, 3, 4, 5, 6, 7, 8, 9 } + }, + + new object[] + { + new List() + { //Hashes + new HashItem() { ObjectId = 1, Hash = 1 }, //1 + new HashItem() { ObjectId = 2, Hash = 3 }, //11 + new HashItem() { ObjectId = 3, Hash = 7 }, //11 + new HashItem() { ObjectId = 4, Hash = 15 }, //111 + new HashItem() { ObjectId = 5, Hash = 31 }, //1111 + new HashItem() { ObjectId = 6, Hash = 63 }, //11111 + new HashItem() { ObjectId = 7, Hash = 127 }, //111111 + new HashItem() { ObjectId = 8, Hash = 255 }, //1111111 + new HashItem() { ObjectId = 9, Hash = 511}, //11111111 + new HashItem() { ObjectId = 10, Hash = 1023 },//111111111 + new HashItem() { ObjectId = 11, Hash = 2047 },//1111111111 + new HashItem() { ObjectId = 12, Hash = 4095 },//11111111111 + new HashItem() { ObjectId = 13, Hash = 8191 },//111111111111 + }, + 31, 4, 3,new List() { 4, 5, 6 } + }, + + new object[] + { + new List() + { + //Hashes + new HashItem() {ObjectId = 1, Hash = 1}, //1 + new HashItem() {ObjectId = 2, Hash = 3}, //11 + new HashItem() {ObjectId = 3, Hash = 7}, //11 + new HashItem() {ObjectId = 4, Hash = 15}, //111 + new HashItem() {ObjectId = 5, Hash = 31}, //1111 + new HashItem() {ObjectId = 6, Hash = 63}, //11111 + new HashItem() {ObjectId = 7, Hash = 127}, //111111 + new HashItem() {ObjectId = 14, Hash = 127}, //111111 + new HashItem() {ObjectId = 15, Hash = 127}, //111111 + new HashItem() {ObjectId = 16, Hash = 127}, //111111 + new HashItem() {ObjectId = 17, Hash = 127}, //111111 + new HashItem() {ObjectId = 8, Hash = 255}, //1111111 + new HashItem() {ObjectId = 9, Hash = 511}, //11111111 + new HashItem() {ObjectId = 10, Hash = 1023}, //111111111 + new HashItem() {ObjectId = 11, Hash = 2047}, //1111111111 + new HashItem() {ObjectId = 12, Hash = 4095}, //11111111111 + new HashItem() {ObjectId = 13, Hash = 8191}, //111111111111 + }, + 127, 4, 100, new List() {3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17} + }, + + new object[] + { + new List() + { + //Hashes + new HashItem() {ObjectId = 1, Hash = 1}, //1 + new HashItem() {ObjectId = 2, Hash = 3}, //11 + new HashItem() {ObjectId = 3, Hash = 7}, //11 + new HashItem() {ObjectId = 4, Hash = 15}, //111 + new HashItem() {ObjectId = 5, Hash = 31}, //1111 + new HashItem() {ObjectId = 6, Hash = 63}, //11111 + new HashItem() {ObjectId = 7, Hash = 127}, //111111 + new HashItem() {ObjectId = 14, Hash = 127}, //111111 + new HashItem() {ObjectId = 15, Hash = 127}, //111111 + new HashItem() {ObjectId = 16, Hash = 127}, //111111 + new HashItem() {ObjectId = 17, Hash = 127}, //111111 + new HashItem() {ObjectId = 8, Hash = 255}, //1111111 + new HashItem() {ObjectId = 9, Hash = 511}, //11111111 + new HashItem() {ObjectId = 10, Hash = 1023}, //111111111 + new HashItem() {ObjectId = 11, Hash = 2047}, //1111111111 + new HashItem() {ObjectId = 12, Hash = 4095}, //11111111111 + new HashItem() {ObjectId = 18, Hash = 4095}, //11111111111 + new HashItem() {ObjectId = 13, Hash = 8191}, //111111111111 + }, + 8191, 6, 100, new List() {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18} + }, + }; + + [Theory] + [MemberData(nameof(Strings))] + public void CalculatesForSearch(IEnumerable hashesInput, long hashSearch, int searchRadius, int limit, List expectedIDs) + { + //check 100 times because vantage index can be different + for (int i = 0; i < 100; i++) + { + VPTreeHashBase vpTreeHashBase = new VPTreeHashBase(); + vpTreeHashBase.CreateIndex(hashesInput); + + var result = vpTreeHashBase.Search(hashSearch, searchRadius, limit).OrderBy(x => x); + + Assert.Equal(expectedIDs, result); + } + } + } +} \ No newline at end of file diff --git a/ImageBase.HashBase.UnitTests/ImageBase.HashBase.UnitTests.csproj b/ImageBase.HashBase.UnitTests/ImageBase.HashBase.UnitTests.csproj new file mode 100644 index 0000000..d306a09 --- /dev/null +++ b/ImageBase.HashBase.UnitTests/ImageBase.HashBase.UnitTests.csproj @@ -0,0 +1,20 @@ + + + + netcoreapp3.1 + + false + + + + + + + + + + + + + + diff --git a/ImageBase.HashBase/HashComparer.cs b/ImageBase.HashBase/HashComparer.cs new file mode 100644 index 0000000..e9aa898 --- /dev/null +++ b/ImageBase.HashBase/HashComparer.cs @@ -0,0 +1,23 @@ +using System.Collections.Generic; +using ImageBase.Common; + +namespace ImageBase.HashBase +{ + public class HashComparer : IComparer + { + private readonly long vantagePointHash; + + public HashComparer(HashItem vantagePoint) + { + this.vantagePointHash = vantagePoint.Hash; + } + + public int Compare(HashItem x, HashItem y) + { + var result = HammingDistance.Calculate(x.Hash, vantagePointHash) + .CompareTo(HammingDistance.Calculate(y.Hash, vantagePointHash)); + + return result; + } + } +} \ No newline at end of file diff --git a/ImageBase.HashBase/HashItem.cs b/ImageBase.HashBase/HashItem.cs new file mode 100644 index 0000000..0252b5e --- /dev/null +++ b/ImageBase.HashBase/HashItem.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ImageBase.HashBase +{ + public struct HashItem + { + /// + /// id for object in database + /// + public long ObjectId { get; set; } + + public long Hash { get; set; } + } +} diff --git a/ImageBase.HashBase/IHashBase.cs b/ImageBase.HashBase/IHashBase.cs new file mode 100644 index 0000000..d96e428 --- /dev/null +++ b/ImageBase.HashBase/IHashBase.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ImageBase.HashBase +{ + interface IHashBase + { + /// + /// Creates in memory VP-Tree for specified hashes. + /// + /// + void CreateIndex(IEnumerable items); + + /// + /// Returns list of objects IDs + /// + /// + /// + /// + /// + IReadOnlyList Search(long hash, int radius, int limit); + } +} diff --git a/ImageBase.HashBase/ImageBase.HashBase.csproj b/ImageBase.HashBase/ImageBase.HashBase.csproj index 9f5c4f4..53a73c7 100644 --- a/ImageBase.HashBase/ImageBase.HashBase.csproj +++ b/ImageBase.HashBase/ImageBase.HashBase.csproj @@ -1,7 +1,11 @@ - + - netstandard2.0 + netstandard2.1 + + + + diff --git a/ImageBase.HashBase/VPTree.cs b/ImageBase.HashBase/VPTree.cs new file mode 100644 index 0000000..d5044d8 --- /dev/null +++ b/ImageBase.HashBase/VPTree.cs @@ -0,0 +1,84 @@ +using ImageBase.Common; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace ImageBase.HashBase +{ + public class VPTree + { + public VPTree(IEnumerable items) + { + itemsList = items.ToList(); + + startIndex = 0; + endIndex = itemsList.Count() - 1; + + CreateNode(); + } + + private VPTree(List items, int startIndex, int endIndex) + { + itemsList = items; + this.startIndex = startIndex; + this.endIndex = endIndex; + + CreateNode(); + } + + /// + /// A vantage point chosen from data + /// + public HashItem VantagePoint { get; private set; } + + /// + /// A radius value defining the range of the node + /// + public int Radius { get; private set; } + + /// + /// The left subtree + /// + public VPTree Inside { get; private set; } + + /// + /// The right subtree + /// + public VPTree Outside { get; private set; } + + private readonly int startIndex; + private readonly int endIndex; + private readonly List itemsList; + + private void CreateNode() + { + int childrenCount = endIndex - startIndex; + + int vantagePointIndex = new Random().Next(startIndex, endIndex); + VantagePoint = itemsList[vantagePointIndex]; + + SwapItems(itemsList, startIndex, vantagePointIndex); + + if (childrenCount < 1) + return; + + int median = (endIndex + startIndex) / 2; + + itemsList.Sort(startIndex + 1, childrenCount, new HashComparer(VantagePoint)); + + Radius = HammingDistance.Calculate(itemsList[median].Hash, VantagePoint.Hash); + + if (median - startIndex > 0) + Inside = new VPTree(itemsList, startIndex + 1, median); + + Outside = new VPTree(itemsList, median + 1, endIndex); + } + + private void SwapItems(List items, int index1, int index2) + { + var temp = items[index1]; + items[index1] = VantagePoint; + items[index2] = temp; + } + } +} diff --git a/ImageBase.HashBase/VPTreeHashBase.cs b/ImageBase.HashBase/VPTreeHashBase.cs new file mode 100644 index 0000000..35d4eca --- /dev/null +++ b/ImageBase.HashBase/VPTreeHashBase.cs @@ -0,0 +1,73 @@ +using ImageBase.Common; +using System.Collections.Generic; + +namespace ImageBase.HashBase +{ + public class VPTreeHashBase : IHashBase + { + private VPTree root; + + public void CreateIndex(IEnumerable items) + { + root = new VPTree(items); + } + + public IReadOnlyList Search(long hash, int radius, int limit) + { + var allItems = new List(); + + var resultIDs = new List(limit); + + SearchAllIDs(root, allItems, hash, radius); + + allItems.Sort(new HashComparer(new HashItem() { Hash = hash })); + + for (int i = 0; i < limit && i < allItems.Count; i++) + { + resultIDs.Add(allItems[i].ObjectId); + } + + return resultIDs; + } + + private void SearchAllIDs(VPTree node, List resultIDs, long hash, int radius) + { + int centerToPointDistance = HammingDistance.Calculate(node.VantagePoint.Hash, hash); + + if ((centerToPointDistance - radius) > node.Radius) + { + if (node.Outside != null) + { + SearchAllIDs(node.Outside, resultIDs, hash, radius); + } + return; + } + + if ((centerToPointDistance + radius) < node.Radius) + { + if (node.Inside != null) + { + SearchAllIDs(node.Inside, resultIDs, hash, radius); + } + if (centerToPointDistance <= radius) + { + resultIDs.Add(node.VantagePoint); + } + return; + } + + if (node.Outside != null) + { + SearchAllIDs(node.Outside, resultIDs, hash, radius); + } + if (node.Inside != null) + { + SearchAllIDs(node.Inside, resultIDs, hash, radius); + } + if (centerToPointDistance <= radius) + { + resultIDs.Add(node.VantagePoint); + } + } + } +} diff --git a/imagebase.sln b/imagebase.sln index 24d2884..4483588 100644 --- a/imagebase.sln +++ b/imagebase.sln @@ -15,7 +15,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageBase.Common.UnitTests" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageBase.WebApp.UnitTests", "ImageBase.WebApp.UnitTests\ImageBase.WebApp.UnitTests.csproj", "{0B6BEB91-7E92-4848-949E-99633DB4A54B}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImageBase.ImageHash.UnitTests", "ImageBase.ImageHash.Tests\ImageBase.ImageHash.UnitTests.csproj", "{9B5EFF94-E990-4828-A424-490F31EA3289}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageBase.ImageHash.UnitTests", "ImageBase.ImageHash.Tests\ImageBase.ImageHash.UnitTests.csproj", "{9B5EFF94-E990-4828-A424-490F31EA3289}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ImageBase.HashBase.UnitTests", "ImageBase.HashBase.UnitTests\ImageBase.HashBase.UnitTests.csproj", "{34EC7522-24B2-4206-B3C1-DA33B9485F2B}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -51,6 +53,10 @@ Global {9B5EFF94-E990-4828-A424-490F31EA3289}.Debug|Any CPU.Build.0 = Debug|Any CPU {9B5EFF94-E990-4828-A424-490F31EA3289}.Release|Any CPU.ActiveCfg = Release|Any CPU {9B5EFF94-E990-4828-A424-490F31EA3289}.Release|Any CPU.Build.0 = Release|Any CPU + {34EC7522-24B2-4206-B3C1-DA33B9485F2B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {34EC7522-24B2-4206-B3C1-DA33B9485F2B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {34EC7522-24B2-4206-B3C1-DA33B9485F2B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {34EC7522-24B2-4206-B3C1-DA33B9485F2B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE