Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add ranges support #14

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions lib/Languages/Galach/Generators/Native/Range.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
<?php

namespace QueryTranslator\Languages\Galach\Generators\Native;

use LogicException;
use QueryTranslator\Languages\Galach\Generators\Common\Visitor;
use QueryTranslator\Languages\Galach\Values\Node\Term;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Values\Node;

/**
* Range Node Visitor implementation.
*/
final class Range extends Visitor
{
public function accept(Node $node)
{
return $node instanceof Term && $node->token instanceof RangeToken;
}

public function visit(Node $node, Visitor $subVisitor = null, $options = null)
{
if (!$node instanceof Term) {
throw new LogicException(
'Implementation accepts instance of Term Node'
);
}

$token = $node->token;

if (!$token instanceof RangeToken) {
throw new LogicException(
'Implementation accepts instance of Range Token'
);
}

$domainPrefix = '' === $token->domain ? '' : "{$token->domain}:";

return $domainPrefix.
$this->buildRangeStart($token).
' TO '.
$this->buildRangeEnd($token);
}

/**
* @param RangeToken $token
* @return string
*/
private function buildRangeStart($token)
{
switch ($token->startType) {
case RangeToken::TYPE_INCLUSIVE:
return '[' . $token->rangeFrom;

case RangeToken::TYPE_EXCLUSIVE:
return '{' . $token->rangeFrom;

default:
throw new LogicException(sprintf('Range start type %s is not supported', $token->startType));
}
}

/**
* @param RangeToken $token
* @return string
*/
private function buildRangeEnd($token)
{
switch ($token->endType) {
case RangeToken::TYPE_INCLUSIVE:
return $token->rangeTo. ']';

case RangeToken::TYPE_EXCLUSIVE:
return $token->rangeTo. '}';

default:
throw new LogicException(sprintf('Range end type %s is not supported', $token->endType));
}
}
}
33 changes: 33 additions & 0 deletions lib/Languages/Galach/TokenExtractor/Full.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use QueryTranslator\Languages\Galach\TokenExtractor;
use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
use QueryTranslator\Languages\Galach\Values\Token\Range;
use QueryTranslator\Languages\Galach\Values\Token\Tag;
use QueryTranslator\Languages\Galach\Values\Token\User;
use QueryTranslator\Languages\Galach\Values\Token\Word;
Expand Down Expand Up @@ -35,6 +36,13 @@ final class Full extends TokenExtractor
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
// Handle of range
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?'.
'(?<rangeStartSymbol>[\[\{])'.
'(?<rangeFrom>([a-zA-Z0-9_-]+|\*)|(?<quoteFrom>(?<!\\\\)["]).*(?:(?<!\\\\)(?P=quoteFrom)))'.
' TO '.
'(?<rangeTo>([a-zA-Z0-9_-]+|\*)|(?<quoteTo>(?<!\\\\)["]).*(?:(?<!\\\\)(?P=quoteTo)))'.
'(?<rangeEndSymbol>[\]\}]))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
];

Expand All @@ -48,6 +56,15 @@ protected function createTermToken($position, array $data)
$lexeme = $data['lexeme'];

switch (true) {
case (isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol'])):
return new Range(
$lexeme,
$position,
$data['domain'],
$data['rangeFrom'], $data['rangeTo'],
$this->getRangeTypeBySymbol($data['rangeStartSymbol']),
$this->getRangeTypeBySymbol($data['rangeEndSymbol'])
);
case isset($data['word']):
return new Word(
$lexeme,
Expand Down Expand Up @@ -85,4 +102,20 @@ protected function createTermToken($position, array $data)

throw new RuntimeException('Could not extract term token from the given data');
}

/**
* Returns the range type, given the symbol.
*
* @param string $symbol the range start/end symbol
*
* @return string
*/
protected function getRangeTypeBySymbol($symbol)
{
if (in_array($symbol, ['{','}'], true)) {
return Range::TYPE_EXCLUSIVE;
}

return Range::TYPE_INCLUSIVE;
}
}
1 change: 1 addition & 0 deletions lib/Languages/Galach/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing
* @see \QueryTranslator\Languages\Galach\Values\Token\Tag
* @see \QueryTranslator\Languages\Galach\Values\Token\User
* @see \QueryTranslator\Languages\Galach\Values\Token\Word
* @see \QueryTranslator\Languages\Galach\Values\Token\Range
*/
const TOKEN_TERM = 512;

Expand Down
74 changes: 74 additions & 0 deletions lib/Languages/Galach/Values/Token/Range.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

namespace QueryTranslator\Languages\Galach\Values\Token;

use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Values\Token;

/**
* Range term token.
*
* @see \QueryTranslator\Languages\Galach\Tokenizer::TOKEN_TERM
*/
final class Range extends Token
{
const TYPE_INCLUSIVE = 'inclusive';
const TYPE_EXCLUSIVE = 'exclusive';

/**
* Holds domain string.
*
* @var string
*/
public $domain;

/**
* @var string
*/
public $rangeFrom;

/**
* @var string
*/
public $rangeTo;

/**
* @var string
*/
public $startType;

/**
* @var string
*/
public $endType;

/**
* @param string $lexeme
* @param int $position
* @param string $domain
* @param string $rangeFrom
* @param string $rangeTo
* @param string $startType
* @param string $endType
*/
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $startType, $endType)
{
$this->ensureValidType($startType);
$this->ensureValidType($endType);

parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);

$this->domain = $domain;
$this->rangeFrom = $rangeFrom;
$this->rangeTo = $rangeTo;
$this->startType = $startType;
$this->endType = $endType;
}

private function ensureValidType($type)
{
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
}
}
}
107 changes: 107 additions & 0 deletions tests/Galach/Generators/Native/RangeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php

namespace QueryTranslator\Tests\Galach\Generators\Native;

use PHPUnit\Framework\TestCase;
use QueryTranslator\Languages\Galach\Generators\Common\Visitor;
use QueryTranslator\Languages\Galach\Generators\Native\Range;
use QueryTranslator\Languages\Galach\Values\Node\Mandatory;
use QueryTranslator\Languages\Galach\Values\Node\Term;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Languages\Galach\Values\Token\Word;
use QueryTranslator\Values\Node;

class RangeTest extends TestCase
{
/**
* @var Visitor
*/
public $visitor;

protected function setUp()
{
$this->visitor = new Range();
}

public function acceptDataprovider()
{
return [
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
[false, new Term(new Word('word', 0, '', 'a'))],
];
}

/**
* @param bool $expected
* @param Node $token
*
* @dataProvider acceptDataprovider
*/
public function testAccepts($expected, $node)
{
$this->assertSame($expected, $this->visitor->accept($node));
}

public function visitDataprovider()
{
return [
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))],
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))],
['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))],
];
}

/**
* @param string $expected
* @param Node $token
*
* @dataProvider visitDataprovider
*/
public function testVisit($expected, $node)
{
$this->assertSame($expected, $this->visitor->visit($node));
}

public function visitWrongNodeDataprovider()
{
return [
[new Mandatory()],
[new Term(new Word('word', 0, '', 'a'))],
];
}

/**
* @param string $expected
* @param Node $token
*
* @dataProvider visitWrongNodeDataprovider
*/
public function testVisitWrongNodeFails($node)
{
$this->expectException(\LogicException::class);
$this->visitor->visit($node);
}

public function testVisitUnknownRangeStartTypeFails()
{
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
$token->startType = 'unknown';
$node = new Term($token);

$this->expectException(\LogicException::class);
$this->expectExceptionMessage('Range start type unknown is not supported');
$this->visitor->visit($node);
}

public function testVisitUnknownRangeEndTypeFails()
{
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
$token->endType = 'unknown';
$node = new Term($token);

$this->expectException(\LogicException::class);
$this->expectExceptionMessage('Range end type unknown is not supported');
$this->visitor->visit($node);
}
}
43 changes: 43 additions & 0 deletions tests/Galach/Tokenizer/FullTokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken;
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin;
use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken;
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken;
use QueryTranslator\Languages\Galach\Values\Token\User as UserToken;
use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken;
Expand Down Expand Up @@ -112,6 +113,48 @@ public function providerForTestTokenize()
new WordToken('word\\ word', 0, '', 'word word'),
],
],
[
'[a TO b]',
[
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'),
],
],
[
'[a TO b}',
[
new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'),
],
],
[
'{a TO b}',
[
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'),
],
],
[
'{a TO b]',
[
new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'),
],
],
[
'[2017-01-01 TO 2017-01-05]',
[
new RangeToken('[2017-01-01 TO 2017-01-05]', 0, '', '2017-01-01', '2017-01-05', 'inclusive', 'inclusive'),
],
],
[
'[20 TO *]',
[
new RangeToken('[20 TO *]', 0, '', '20', '*', 'inclusive', 'inclusive'),
],
],
[
'[* TO 20]',
[
new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive'),
],
],
[
'"phrase"',
[
Expand Down
Loading