From 7238593a820dc0654489d3dda81673a6c0bba184 Mon Sep 17 00:00:00 2001 From: da3dsoul Date: Wed, 8 Feb 2017 07:24:36 -0500 Subject: [PATCH 1/2] Fix bitap index out of range --- JMMServer/Utils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/JMMServer/Utils.cs b/JMMServer/Utils.cs index 424be9014..40cc48085 100644 --- a/JMMServer/Utils.cs +++ b/JMMServer/Utils.cs @@ -308,7 +308,7 @@ public static int BitapFuzzySearch32(string text, string pattern, int k, out int int result = -1; int m = query.Length; int[] R; - int[] patternMask = new int[AllowedSearchCharacters.Length+1]; + int[] patternMask = new int[128]; int i, d; dist = k + 1; @@ -376,7 +376,7 @@ public static int BitapFuzzySearch64(string text, string pattern, int k, out int int result = -1; int m = query.Length; ulong[] R; - ulong[] patternMask = new ulong[AllowedSearchCharacters.Length+1]; + ulong[] patternMask = new ulong[128]; int i, d; dist = text.Length; From 19c986318555590f62133b5e2248e1c62b38b199 Mon Sep 17 00:00:00 2001 From: da3dsoul Date: Wed, 8 Feb 2017 15:02:23 -0500 Subject: [PATCH 2/2] [api] Clean up bitap and fix searching with ' ', '-', and '+' --- JMMServer/Utils.cs | 50 ++++++++++------------------------------------ 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/JMMServer/Utils.cs b/JMMServer/Utils.cs index 40cc48085..14e3cd0cf 100644 --- a/JMMServer/Utils.cs +++ b/JMMServer/Utils.cs @@ -167,29 +167,8 @@ public static int LevenshteinDistance(string s, string t) public static string FilterCharacters(this string value, char[] allowed, bool blacklist = false) { StringBuilder sb = new StringBuilder(value); - int start = 0; - while (start < sb.Length) - { - if (blacklist ^ !allowed.Contains(sb[start])) - start++; - else - break; - } - if (start == sb.Length) - { - sb.Length = 0; - return ""; - } - int end = sb.Length - 1; - while (end >= 0) - { - if (blacklist ^ !allowed.Contains(sb[end])) - end--; - else - break; - } int dest = 0; - for (int i = start; i <= end; i++) + for (int i = 0; i <= sb.Length-1; i++) { if (blacklist ^ allowed.Contains(sb[i])) { @@ -227,42 +206,31 @@ private static void CompactWhitespaces(StringBuilder sb) else break; } - - // if [sb] has only whitespaces, then return empty string - if (start == sb.Length) { sb.Length = 0; return; } - - // set [end] to last not-whitespace char - int end = sb.Length - 1; while (end >= 0) { - if (Char.IsWhiteSpace(sb[end])) + if (char.IsWhiteSpace(sb[end])) end--; else break; } - - // compact string - int dest = 0; bool previousIsWhitespace = false; for (int i = start; i <= end; i++) { - if (Char.IsWhiteSpace(sb[i])) + if (char.IsWhiteSpace(sb[i])) { - if (!previousIsWhitespace) - { - previousIsWhitespace = true; - sb[dest] = ' '; - dest++; - } + if (previousIsWhitespace) continue; + previousIsWhitespace = true; + sb[dest] = ' '; + dest++; } else { @@ -294,6 +262,8 @@ public static int BitapFuzzySearch32(string text, string pattern, int k, out int string query = pattern.FilterCharacters(AllowedSearchCharacters); inputString = inputString.Replace('_', ' ').Replace('-', ' '); query = query.Replace('_', ' ').Replace('-', ' '); + query = query.CompactWhitespaces(); + inputString = inputString.CompactWhitespaces(); // Case insensitive. We just removed the fancy characters, so latin alphabet lowercase is all we should have query = query.ToLowerInvariant(); inputString = inputString.ToLowerInvariant(); @@ -362,6 +332,8 @@ public static int BitapFuzzySearch64(string text, string pattern, int k, out int string query = pattern.FilterCharacters(AllowedSearchCharacters); inputString = inputString.Replace('_', ' ').Replace('-', ' '); query = query.Replace('_', ' ').Replace('-', ' '); + query = query.CompactWhitespaces(); + inputString = inputString.CompactWhitespaces(); // Case insensitive. We just removed the fancy characters, so latin alphabet lowercase is all we should have query = query.ToLowerInvariant(); inputString = inputString.ToLowerInvariant();