Skip to content

Commit

Permalink
fix: add back the missing sanitization
Browse files Browse the repository at this point in the history
…because I forgot it was a thing we needed for the strict search.
  • Loading branch information
revam committed Jan 7, 2024
1 parent 5f3affb commit e4e3cba
Showing 1 changed file with 23 additions and 18 deletions.
41 changes: 23 additions & 18 deletions Shoko.Server/Utilities/SeriesSearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ public static string SanitizeFuzzy(string value, bool replaceInvalid)
return value.CompactWhitespaces();
}

private static string SanitizeSearchInput(this string value) =>
value.Replace('_', ' ')
.Replace('-', ' ')
.CompactWhitespaces()
.ToLowerInvariant();

// This forces ASCII, because it's faster to stop caring if ss and ß are the same
// No it's not perfect, but it works better for those who just want to do lazy searching
private static string ForceASCII(this string value) =>
value.FilterSearchCharacters()
.Replace('_', ' ')
.Replace('-', ' ')
.CompactWhitespaces()
// Case insensitive. We just removed the fancy characters, so latin
// alphabet lowercase is all we should have.
.ToLowerInvariant();

private static readonly IStringDistance DiceSearch = new SorensenDice();
Expand All @@ -58,11 +60,13 @@ public static SearchResult<T> DiceFuzzySearch<T>(string text, string pattern, T
if (string.IsNullOrWhiteSpace(text) || string.IsNullOrWhiteSpace(pattern))
return new();

// Case insensitive.
text = text.ToLowerInvariant();
pattern = pattern.ToLowerInvariant();
// Sanitize inputs before use.
text = text.SanitizeSearchInput();
pattern = pattern.SanitizeSearchInput();
if (string.IsNullOrWhiteSpace(pattern) || string.IsNullOrWhiteSpace(text))
return new();

// Shortcut
// Strict search for any text (e.g. ASCII, Japanese Kanji/Kana, etc.).
var index = text.IndexOf(pattern, StringComparison.Ordinal);
if (index > -1)
return new()
Expand All @@ -74,19 +78,20 @@ public static SearchResult<T> DiceFuzzySearch<T>(string text, string pattern, T
Result = value,
};

var inputString = text.ForceASCII();
var query = pattern.ForceASCII();
if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(inputString))
// If strict search didn't work, then force ASCII and do a fuzzy search.
text = text.ForceASCII();
pattern = pattern.ForceASCII();
if (string.IsNullOrWhiteSpace(pattern) || string.IsNullOrWhiteSpace(text))
return new();

// always search the longer string for the shorter one
if (query.Length > inputString.Length)
(inputString, query) = (query, inputString);
// Always search the longer string for the shorter one.
if (pattern.Length > text.Length)
(text, pattern) = (pattern, text);

var result = DiceSearch.Distance(inputString, query);
var result = DiceSearch.Distance(text, pattern);

// Don't count an error as liberally when the title is short
if (inputString.Length < 5 && result > 0.5)
// Don't count an error as liberally when the title is short.
if (text.Length < 5 && result > 0.5)
return new();

if (result >= 0.8)
Expand All @@ -95,7 +100,7 @@ public static SearchResult<T> DiceFuzzySearch<T>(string text, string pattern, T
return new()
{
Distance = result,
LengthDifference = Math.Abs(query.Length - inputString.Length),
LengthDifference = Math.Abs(pattern.Length - text.Length),
Match = text,
Result = value,
};
Expand Down

0 comments on commit e4e3cba

Please sign in to comment.