Skip to content

Commit

Permalink
Add Fuzzy Search driver with unicode support
Browse files Browse the repository at this point in the history
  • Loading branch information
TomLingham committed May 14, 2016
1 parent dbbeca8 commit 0732dc7
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 0 deletions.
4 changes: 4 additions & 0 deletions config/searchy.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
'class' => 'TomLingham\Searchy\SearchDrivers\FuzzySearchDriver',
],

'ufuzzy' => [
'class' => 'TomLingham\Searchy\SearchDrivers\FuzzySearchUnicodeDriver',
],

'simple' => [
'class' => 'TomLingham\Searchy\SearchDrivers\SimpleSearchDriver',
],
Expand Down
31 changes: 31 additions & 0 deletions src/Matchers/AcronymUnicodeMatcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

namespace TomLingham\Searchy\Matchers;

/**
* Matches strings for Acronym 'like' matches but does NOT return Studly Case Matches.
*
* for example, a search for 'fb' would match; 'foo bar' or 'Fred Brown' but not 'FreeBeer'.
*
* Class AcronymMatcher
*/
class AcronymUnicodeMatcher extends BaseMatcher
{
/**
* @var string
*/
protected $operator = 'LIKE';

/**
* @param $searchString
*
* @return mixed|string
*/
public function formatSearchString($searchString)
{
$results = [];
preg_match_all('/./u', mb_strtoupper($searchString, 'UTF-8'), $results);

return implode('% ', $results[0]).'%';
}
}
45 changes: 45 additions & 0 deletions src/Matchers/ConsecutiveCharactersUnicodeMatcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

namespace TomLingham\Searchy\Matchers;

/**
* Matches strings that include all the characters in the search relatively position within the string.
* It also calculates the percentage of characters in the string that are matched and applies the multiplier accordingly.
*
* For Example, a search for 'fba' would match; 'Foo Bar' or 'Afraid of bats'
*
* Class ConsecutiveCharactersMatcher
*/
class ConsecutiveCharactersUnicodeMatcher extends BaseMatcher
{
/**
* @var string
*/
protected $operator = 'LIKE';

/**
* @param $searchString
*
* @return string
*/
public function formatSearchString($searchString)
{
$results = [];
preg_match_all('/./u', $searchString, $results);

return '%'.implode('%', $results[0]).'%';
}

/**
* @param $column
* @param $rawString
*
* @return mixed|string
*/
public function buildQueryString($column, $rawString)
{
$searchString = $this->formatSearchString($rawString);

return "IF( REPLACE($column, '\.', '') {$this->operator} '$searchString', ROUND({$this->multiplier} * ( CHAR_LENGTH( '$rawString' ) / CHAR_LENGTH( REPLACE($column, ' ', '') ))), 0)";
}
}
36 changes: 36 additions & 0 deletions src/Matchers/StudlyCaseUnicodeMatcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php

namespace TomLingham\Searchy\Matchers;

/**
* Matches Studly Case strings using the first letters of the words only.
*
* For example a search for 'hp' would match; 'HtmlServiceProvider' or 'HashParser' but not 'hasProvider'
*
* Class StudlyCaseMatcher
*/
class StudlyCaseUnicodeMatcher extends BaseMatcher
{
/**
* @var string
*/
protected $operator = 'LIKE BINARY';

/**
* @param $searchString
*
* @return string
*/
public function formatSearchString($searchString)
{
$results = [];
preg_match_all('/./u', mb_strtoupper($searchString, 'UTF-8'), $results);

return implode('%', $results[0]).'%';
}

public function buildQueryString($column, $searchString)
{
return "IF( CHAR_LENGTH( TRIM( $column )) = CHAR_LENGTH( REPLACE( TRIM( $column ), ' ', '')) AND $column {$this->operator} '{$this->formatSearchString($searchString)}', {$this->multiplier}, 0)";
}
}
20 changes: 20 additions & 0 deletions src/SearchDrivers/FuzzySearchUnicodeDriver.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

namespace TomLingham\Searchy\SearchDrivers;

class FuzzySearchUnicodeDriver extends BaseSearchDriver
{
/**
* @var array
*/
protected $matchers = [
\TomLingham\Searchy\Matchers\ExactMatcher::class => 100,
\TomLingham\Searchy\Matchers\StartOfStringMatcher::class => 50,
\TomLingham\Searchy\Matchers\AcronymUnicodeMatcher::class => 42,
\TomLingham\Searchy\Matchers\ConsecutiveCharactersUnicodeMatcher::class => 40,
\TomLingham\Searchy\Matchers\StartOfWordsMatcher::class => 35,
\TomLingham\Searchy\Matchers\StudlyCaseUnicodeMatcher::class => 32,
\TomLingham\Searchy\Matchers\InStringMatcher::class => 30,
\TomLingham\Searchy\Matchers\TimesInStringMatcher::class => 8,
];
}

0 comments on commit 0732dc7

Please sign in to comment.