diff --git a/lib/Languages/Galach/Generators/Native/Range.php b/lib/Languages/Galach/Generators/Native/Range.php new file mode 100644 index 0000000..73665f3 --- /dev/null +++ b/lib/Languages/Galach/Generators/Native/Range.php @@ -0,0 +1,80 @@ +token instanceof RangeToken; + } + + public function visit(Node $node, Visitor $subVisitor = null, $options = null) + { + if (!$node instanceof Term) { + throw new LogicException( + 'Implementation accepts instance of Term Node' + ); + } + + $token = $node->token; + + if (!$token instanceof RangeToken) { + throw new LogicException( + 'Implementation accepts instance of Range Token' + ); + } + + $domainPrefix = '' === $token->domain ? '' : "{$token->domain}:"; + + return $domainPrefix. + $this->buildRangeStart($token). + ' TO '. + $this->buildRangeEnd($token); + } + + /** + * @param RangeToken $token + * @return string + */ + private function buildRangeStart($token) + { + switch ($token->startType) { + case RangeToken::TYPE_INCLUSIVE: + return '[' . $token->rangeFrom; + + case RangeToken::TYPE_EXCLUSIVE: + return '{' . $token->rangeFrom; + + default: + throw new LogicException(sprintf('Range start type %s is not supported', $token->startType)); + } + } + + /** + * @param RangeToken $token + * @return string + */ + private function buildRangeEnd($token) + { + switch ($token->endType) { + case RangeToken::TYPE_INCLUSIVE: + return $token->rangeTo. ']'; + + case RangeToken::TYPE_EXCLUSIVE: + return $token->rangeTo. '}'; + + default: + throw new LogicException(sprintf('Range end type %s is not supported', $token->endType)); + } + } +} diff --git a/lib/Languages/Galach/TokenExtractor/Full.php b/lib/Languages/Galach/TokenExtractor/Full.php index 6733b39..4e10304 100644 --- a/lib/Languages/Galach/TokenExtractor/Full.php +++ b/lib/Languages/Galach/TokenExtractor/Full.php @@ -5,6 +5,7 @@ use QueryTranslator\Languages\Galach\TokenExtractor; use QueryTranslator\Languages\Galach\Tokenizer; use QueryTranslator\Languages\Galach\Values\Token\Phrase; +use QueryTranslator\Languages\Galach\Values\Token\Range; use QueryTranslator\Languages\Galach\Values\Token\Tag; use QueryTranslator\Languages\Galach\Values\Token\User; use QueryTranslator\Languages\Galach\Values\Token\Word; @@ -35,6 +36,13 @@ final class Full extends TokenExtractor '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?.*?)(?:(? Tokenizer::TOKEN_TERM, + // Handle of range + '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?'. + '(?[\[\{])'. + '(?([a-zA-Z0-9_-]+|\*)|(?(?([a-zA-Z0-9_-]+|\*)|(?(?[\]\}]))/Aus' => Tokenizer::TOKEN_TERM, '/(?(?:(?[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, ]; @@ -48,6 +56,15 @@ protected function createTermToken($position, array $data) $lexeme = $data['lexeme']; switch (true) { + case (isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol'])): + return new Range( + $lexeme, + $position, + $data['domain'], + $data['rangeFrom'], $data['rangeTo'], + $this->getRangeTypeBySymbol($data['rangeStartSymbol']), + $this->getRangeTypeBySymbol($data['rangeEndSymbol']) + ); case isset($data['word']): return new Word( $lexeme, @@ -85,4 +102,20 @@ protected function createTermToken($position, array $data) throw new RuntimeException('Could not extract term token from the given data'); } + + /** + * Returns the range type, given the symbol. + * + * @param string $symbol the range start/end symbol + * + * @return string + */ + protected function getRangeTypeBySymbol($symbol) + { + if (in_array($symbol, ['{','}'], true)) { + return Range::TYPE_EXCLUSIVE; + } + + return Range::TYPE_INCLUSIVE; + } } diff --git a/lib/Languages/Galach/Tokenizer.php b/lib/Languages/Galach/Tokenizer.php index f87438e..325464e 100644 --- a/lib/Languages/Galach/Tokenizer.php +++ b/lib/Languages/Galach/Tokenizer.php @@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing * @see \QueryTranslator\Languages\Galach\Values\Token\Tag * @see \QueryTranslator\Languages\Galach\Values\Token\User * @see \QueryTranslator\Languages\Galach\Values\Token\Word + * @see \QueryTranslator\Languages\Galach\Values\Token\Range */ const TOKEN_TERM = 512; diff --git a/lib/Languages/Galach/Values/Token/Range.php b/lib/Languages/Galach/Values/Token/Range.php new file mode 100644 index 0000000..16f5503 --- /dev/null +++ b/lib/Languages/Galach/Values/Token/Range.php @@ -0,0 +1,74 @@ +ensureValidType($startType); + $this->ensureValidType($endType); + + parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position); + + $this->domain = $domain; + $this->rangeFrom = $rangeFrom; + $this->rangeTo = $rangeTo; + $this->startType = $startType; + $this->endType = $endType; + } + + private function ensureValidType($type) + { + if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) { + throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type)); + } + } +} diff --git a/tests/Galach/Generators/Native/RangeTest.php b/tests/Galach/Generators/Native/RangeTest.php new file mode 100644 index 0000000..8df8426 --- /dev/null +++ b/tests/Galach/Generators/Native/RangeTest.php @@ -0,0 +1,107 @@ +visitor = new Range(); + } + + public function acceptDataprovider() + { + return [ + [true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], + [false, new Term(new Word('word', 0, '', 'a'))], + ]; + } + + /** + * @param bool $expected + * @param Node $token + * + * @dataProvider acceptDataprovider + */ + public function testAccepts($expected, $node) + { + $this->assertSame($expected, $this->visitor->accept($node)); + } + + public function visitDataprovider() + { + return [ + ['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], + ['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))], + ['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))], + ['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))], + ]; + } + + /** + * @param string $expected + * @param Node $token + * + * @dataProvider visitDataprovider + */ + public function testVisit($expected, $node) + { + $this->assertSame($expected, $this->visitor->visit($node)); + } + + public function visitWrongNodeDataprovider() + { + return [ + [new Mandatory()], + [new Term(new Word('word', 0, '', 'a'))], + ]; + } + + /** + * @param string $expected + * @param Node $token + * + * @dataProvider visitWrongNodeDataprovider + */ + public function testVisitWrongNodeFails($node) + { + $this->expectException(\LogicException::class); + $this->visitor->visit($node); + } + + public function testVisitUnknownRangeStartTypeFails() + { + $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); + $token->startType = 'unknown'; + $node = new Term($token); + + $this->expectException(\LogicException::class); + $this->expectExceptionMessage('Range start type unknown is not supported'); + $this->visitor->visit($node); + } + + public function testVisitUnknownRangeEndTypeFails() + { + $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); + $token->endType = 'unknown'; + $node = new Term($token); + + $this->expectException(\LogicException::class); + $this->expectExceptionMessage('Range end type unknown is not supported'); + $this->visitor->visit($node); + } +} diff --git a/tests/Galach/Tokenizer/FullTokenizerTest.php b/tests/Galach/Tokenizer/FullTokenizerTest.php index 55c3dfb..afb6d07 100644 --- a/tests/Galach/Tokenizer/FullTokenizerTest.php +++ b/tests/Galach/Tokenizer/FullTokenizerTest.php @@ -8,6 +8,7 @@ use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken; use QueryTranslator\Languages\Galach\Values\Token\GroupBegin; use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken; +use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken; use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken; use QueryTranslator\Languages\Galach\Values\Token\User as UserToken; use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken; @@ -112,6 +113,48 @@ public function providerForTestTokenize() new WordToken('word\\ word', 0, '', 'word word'), ], ], + [ + '[a TO b]', + [ + new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'), + ], + ], + [ + '[a TO b}', + [ + new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'), + ], + ], + [ + '{a TO b}', + [ + new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'), + ], + ], + [ + '{a TO b]', + [ + new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'), + ], + ], + [ + '[2017-01-01 TO 2017-01-05]', + [ + new RangeToken('[2017-01-01 TO 2017-01-05]', 0, '', '2017-01-01', '2017-01-05', 'inclusive', 'inclusive'), + ], + ], + [ + '[20 TO *]', + [ + new RangeToken('[20 TO *]', 0, '', '20', '*', 'inclusive', 'inclusive'), + ], + ], + [ + '[* TO 20]', + [ + new RangeToken('[* TO 20]', 0, '', '*', '20', 'inclusive', 'inclusive'), + ], + ], [ '"phrase"', [ diff --git a/tests/Galach/Tokenizer/TextTokenizerTest.php b/tests/Galach/Tokenizer/TextTokenizerTest.php index 8fb2eaa..73174a0 100644 --- a/tests/Galach/Tokenizer/TextTokenizerTest.php +++ b/tests/Galach/Tokenizer/TextTokenizerTest.php @@ -95,6 +95,55 @@ public static function setUpBeforeClass() new WordToken('@user', 0, '', '@user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5), ], + '[a TO b]' => [ + new WordToken('[a', 0, '', '[a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b]', 6, '', 'b]'), + ], + '[a TO b}' => [ + new WordToken('[a', 0, '', '[a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b}', 6, '', 'b}'), + ], + '{a TO b}' => [ + new WordToken('{a', 0, '', '{a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b}', 6, '', 'b}'), + ], + '{a TO b]' => [ + new WordToken('{a', 0, '', '{a'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('b]', 6, '', 'b]'), + ], + '[2017-01-01 TO 2017-01-05]' => [ + new WordToken('[2017-01-01', 0, '', '[2017-01-01'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), + new WordToken('TO', 12, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14), + new WordToken('2017-01-05]', 15, '', '2017-01-05]'), + ], + '[20 TO *]' => [ + new WordToken('[20', 0, '', '[20'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), + new WordToken('TO', 4, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), + new WordToken('*]', 7, '', '*]'), + ], + '[* TO 20]' => [ + new WordToken('[*', 0, '', '[*'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), + new WordToken('TO', 3, '', 'TO'), + new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), + new WordToken('20]', 6, '', '20]'), + ], 'domain:domain:' => [ new WordToken('domain:domain:', 0, '', 'domain:domain:'), ], diff --git a/tests/Galach/Values/Token/RangeTest.php b/tests/Galach/Values/Token/RangeTest.php new file mode 100644 index 0000000..b6b9bfe --- /dev/null +++ b/tests/Galach/Values/Token/RangeTest.php @@ -0,0 +1,38 @@ +expectException(\InvalidArgumentException::class); + new Range('[a TO b]', 0, '', 'a', 'b', $startType, $endType); + } +}