feat(search): match fuzzy

This ensures we properly handle fuzzy results (again), i.e. when providing multiple search strings, return results that match on a best effort basis, rather than being overly strict: items = [ 'foo', 'bar' ] search = 'foo bar' result = [ 'foo', 'bar' ] The tokens allow folks to make sense of the actual parts of the results matched, and allow them to refine the search as they see fit. Along with this results are still scored, i.e. most reasonable results are first shown to the users. Related to bpmn-io/bpmn-js#2235
bpmn-io · Oct 31, 2024 · 00a219b · 00a219b
1 parent 7b966ce
commit 00a219b
Show file tree

Hide file tree

Showing 2 changed files with 207 additions and 65 deletions.
diff --git a/lib/features/search/search.js b/lib/features/search/search.js
@@ -144,44 +144,46 @@ export function hasMatch(tokens) {
 * @returns {number}
 */
 export function compareTokens(tokensA, tokensB) {
+  return scoreTokens(tokensB) - scoreTokens(tokensA);
+}
 
-  const tokensAHasMatch = tokensA && hasMatch(tokensA),
-        tokensBHasMatch = tokensB && hasMatch(tokensB);
-
-  if (tokensAHasMatch && !tokensBHasMatch) {
-    return -1;
-  }
-
-  if (!tokensAHasMatch && tokensBHasMatch) {
-    return 1;
-  }
+/**
+ * @param { Token[] } tokens
+ * @returns { number }
+ */
+function scoreTokens(tokens) {
+  return tokens.reduce((sum, token) => sum + scoreToken(token), 0);
+}
 
-  if (!tokensAHasMatch && !tokensBHasMatch) {
+/**
+ * Score a token.
+ *
+ * @param { Token } token
+ *
+ * @returns { number }
+ */
+function scoreToken(token) {
+  if (!token.match) {
     return 0;
   }
 
-  const tokensAFirstMatch = tokensA.find(isMatch),
-        tokensBFirstMatch = tokensB.find(isMatch);
-
-  if (tokensAFirstMatch.index < tokensBFirstMatch.index) {
-    return -1;
-  }
-
-  if (tokensAFirstMatch.index > tokensBFirstMatch.index) {
-    return 1;
-  }
+  const modifier = token.start
+    ? 1.37
+    : token.wordStart
+      ? 1.13
+      : 1;
 
-  return 0;
+  return token.value.length * modifier;
 }
 
 /**
-* Compares two strings.
-*
-* @param {string} [a = '']
-* @param {string} [b = '']
-*
-* @returns {number}
-*/
+ * Compares two strings.
+ *
+ * @param {string} [a = '']
+ * @param {string} [b = '']
+ *
+ * @returns {number}
+ */
 export function compareStrings(a = '', b = '') {
   return a.localeCompare(b);
 }
@@ -193,44 +195,61 @@ export function compareStrings(a = '', b = '') {
 * @return {Token[]}
 */
 export function getMatchingTokens(string, pattern) {
-  var tokens = [],
-      originalString = string;
 
   if (!string) {
-    return tokens;
+    return [];
   }
 
-  string = string.toLowerCase();
-  pattern = pattern.toLowerCase();
+  const tokens = [];
+
+  const regexpString = [
+    pattern,
+    ...pattern.split(/\s+/).filter(s => s.length > 1)
+  ].map(escapeRegexp).flatMap(str => [ '(?<wordStart>\\b' + str + ')', str ]).join('|');
+
+  const regexp = new RegExp(regexpString, 'ig');
+
+  let match;
+  let lastIndex = 0;
 
-  var index = string.indexOf(pattern);
+  while ((match = regexp.exec(string))) {
 
-  if (index > -1) {
-    if (index !== 0) {
-      tokens.push({
-        value: originalString.slice(0, index),
-        index: 0
-      });
+    const [ value ] = match;
+
+    if (match.index > lastIndex) {
+
+      // add previous token (NO match)
+      if (match.index !== 0) {
+        tokens.push({
+          value: string.slice(lastIndex, match.index),
+          index: lastIndex
+        });
+      }
     }
 
+    // add current token (match)
     tokens.push({
-      value: originalString.slice(index, index + pattern.length),
-      index: index,
-      match: true
+      value,
+      index: match.index,
+      match: true,
+      wordStart: !!match.groups.wordStart,
+      start: match.index === 0
     });
 
-    if (pattern.length + index < string.length) {
-      tokens.push({
-        value: originalString.slice(index + pattern.length),
-        index: index + pattern.length
-      });
-    }
-  } else {
+    lastIndex = match.index + value.length;
+  }
+
+  // add after token (NO match)
+  if (lastIndex < string.length) {
     tokens.push({
-      value: originalString,
-      index: 0
+      value: string.slice(lastIndex),
+      index: lastIndex
     });
   }
 
   return tokens;
+}
+
+function escapeRegexp(string) {
+  return string.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
 }
diff --git a/test/spec/features/search/searchSpec.js b/test/spec/features/search/searchSpec.js
@@ -15,6 +15,31 @@ describe('search', function() {
   }));
 
 
+  it('should search simple', inject(function(search) {
+
+    // given
+    const items = [
+      {
+        title: 'foo',
+        description: 'woop'
+      },
+      {
+        title: 'foobar'
+      }
+    ];
+
+    const searchItems = (items, term) => search(items, term, {
+      keys: [
+        'title',
+        'description'
+      ]
+    });
+
+    // then
+    expect(searchItems(items, 'foo')).to.have.length(2);
+    expect(searchItems(items, 'bar')).to.have.length(1);
+    expect(searchItems(items, 'other')).to.have.length(0);
+  }));
 
 
   describe('result', function() {
@@ -75,6 +100,8 @@ describe('search', function() {
     }));
 
   });
+
+
   it('should search complex', inject(function(search) {
 
     // given
@@ -156,7 +183,7 @@ describe('search', function() {
   }));
 
 
-  it('should sort by match location', inject(function(search) {
+  it('should prioritize start of word', inject(function(search) {
 
     // given
     const items = [
@@ -185,8 +212,34 @@ describe('search', function() {
     // then
     expect(results).to.have.length(3);
     expect(results[0].item).to.eql(items[1]);
-    expect(results[1].item).to.eql(items[2]);
-    expect(results[2].item).to.eql(items[0]);
+    expect(results[1].item).to.eql(items[0]);
+    expect(results[2].item).to.eql(items[2]);
+  }));
+
+
+  it('should prioritize start of term', inject(function(search) {
+
+    // given
+    const items = [
+      {
+        title: 'yes barfoo'
+      },
+      {
+        title: 'yes foowoo'
+      }
+    ];
+
+    // when
+    const results = search(items, 'foo', {
+      keys: [
+        'title'
+      ]
+    });
+
+    // then
+    expect(results).to.have.length(2);
+    expect(results[0].item).to.eql(items[1]);
+    expect(results[1].item).to.eql(items[0]);
   }));
 
 
@@ -288,29 +341,99 @@ describe('search', function() {
   }));
 
 
-  it('should match partial tokens', inject(function(search) {
+  it('should match case insensitive', inject(function(search) {
 
     // given
     const items = [
       {
-        title: 'baz',
-        description: 'baz'
-      },
+        title: 'KAFKAF'
+      }
+    ];
+
+    // when
+    const results = search(items, 'kaf', {
+      keys: [
+        'title'
+      ]
+    });
+
+    // then
+    expect(results).to.have.length(1);
+    expect(results[0].item).to.eql(items[0]);
+  }));
+
+
+  it('should match partial tokens', inject(function(search) {
+
+    // given
+    const items = [
       {
         title: 'Kafka amess',
         description: 'Nope'
       },
       {
-        title: 'Kaboom'
+        title: 'mess'
+      }
+    ];
+
+    // when
+    const results = search(items, 'Kaf mess', {
+      keys: [
+        'title',
+        'description',
+        'search'
+      ]
+    });
+
+    // then
+    expect(results).to.have.length(2);
+    expect(results[0].item).to.eql(items[0]);
+    expect(results[1].item).to.eql(items[1]);
+  }));
+
+
+  it('should prioritize longest match', inject(function(search) {
+
+    // given
+    const items = [
+      {
+        title: 'baz'
       },
       {
-        title: 'Kafka message',
-        description: 'Nope'
+        title: 'baba'
       }
     ];
 
     // when
-    const results = search(items, 'Kaf mess', {
+    const results = search(items, 'baz baba', {
+      keys: [
+        'title',
+        'description',
+        'search'
+      ]
+    });
+
+    // then
+    expect(results).to.have.length(2);
+    expect(results[0].item).to.eql(items[1]);
+    expect(results[1].item).to.eql(items[0]);
+  }));
+
+
+  it('should match with spaces', inject(function(search) {
+
+    // given
+    const items = [
+      {
+        title: 'bar foo bar'
+      },
+      {
+        title: 'other bar foo'
+      }
+    ];
+
+    // when
+    const results = search(items, 'foo bar', {
       keys: [
         'title',
         'description',
@@ -320,7 +443,7 @@ describe('search', function() {
 
     // then
     expect(results).to.have.length(2);
-    expect(results[0].item).to.eql(items[3]);
+    expect(results[0].item).to.eql(items[0]);
     expect(results[1].item).to.eql(items[1]);
   }));