From b3bb1b46e8f3c1a1f50c6a31ffe2788e0464abcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20S=C4=85gol?= Date: Mon, 18 Dec 2023 08:46:18 +0100 Subject: [PATCH] #13 Add command to remove manuals from index by given constraints This commit adds a new command `docsearch:index:delete`, which allows for the deletion of all or selected manuals from the Elasticsearch index. It accepts the following options: - --manual-slug: The slug of the manual to be removed from the index - --manual-version: The version of the manual to be removed from the index - --manual-type: The type of the manual to be removed from the index - --manual-language: The language of the manual to be removed from the index To execute the command, type: `bin/console docsearch:index:delete` To remove, for example, a specific manual version, type: `bin/console docsearch:index:delete --manual-version=12.4` Multiple options can be combined for more targeted deletions. Resolves issue: #13 --- README.rst | 22 +++ src/Command/IndexCleaner.php | 73 +++++++ src/Dto/Constraints.php | 36 ++++ src/QueryBuilder/ElasticQueryBuilder.php | 34 ++++ src/Repository/ElasticRepository.php | 32 ++- tests/Unit/Command/IndexCleanerTest.php | 74 +++++++ tests/Unit/Dto/ConstraintsTest.php | 73 +++++++ .../QueryBuilder/ElasticQueryBuilderTest.php | 185 ++++++++++++++++++ 8 files changed, 527 insertions(+), 2 deletions(-) create mode 100644 src/Command/IndexCleaner.php create mode 100644 src/Dto/Constraints.php create mode 100644 src/QueryBuilder/ElasticQueryBuilder.php create mode 100644 tests/Unit/Command/IndexCleanerTest.php create mode 100644 tests/Unit/Dto/ConstraintsTest.php create mode 100644 tests/Unit/QueryBuilder/ElasticQueryBuilderTest.php diff --git a/README.rst b/README.rst index 21abc09..d0f6a8d 100644 --- a/README.rst +++ b/README.rst @@ -89,6 +89,28 @@ Removing index to start fresh If you want to start with fresh Elasticsearch index locally, you can use chrome extensions like `Elasticvue` to clear/drop Elasticsearch index if necessary. +Removing selected manuals from index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you want to remove selected manuals from index, you can use chrome extensions or the command `docsearch:index:delete`. + +.. code-block:: bash + + --manual-slug - slug of the manual to remove from index + --manual-version - version of the manual to remove from index + --manual-type - type of the manual to remove from index + --manual-language - language of the manual to remove from index + +execute it with: + +.. code-block:: bash + + ddev exec ./bin/console docsearch:index:delete --manual-slug= --manual-version=9.5 --manual-type=Extension --manual-language=en-us + +.. note:: + If you set the ``--manual-version`` option, manuals with this version will be updated by removing + selected version from the list, and if this version was the last one, only then the whole manual will be removed. + Indexing Core changelog ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/Command/IndexCleaner.php b/src/Command/IndexCleaner.php new file mode 100644 index 0000000..cc9f15f --- /dev/null +++ b/src/Command/IndexCleaner.php @@ -0,0 +1,73 @@ +addOption('manual-slug', 'mp', InputArgument::OPTIONAL, 'Manula path', ''); + $this->addOption('manual-version', 'mv', InputArgument::OPTIONAL, 'Manual version', ''); + $this->addOption('manual-type', 'mt', InputArgument::OPTIONAL, 'Manual type', ''); + $this->addOption('manual-language', 'ml', InputArgument::OPTIONAL, 'Manual language', ''); + } + + /** + * @param InputInterface $input + * @param OutputInterface $output + * @return int + * @throws LogicException + * @throws RuntimeException + * @throws \InvalidArgumentException + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $timer = new Stopwatch(); + $timer->start('importer'); + + $io = new SymfonyStyle($input, $output); + $io->title('Removing from index documents by provided criteria'); + + $constraints = new Constraints( + $input->getOption('manual-slug'), + $input->getOption('manual-version'), + $input->getOption('manual-type'), + $input->getOption('manual-language') + ); + + $deletedManualsCount = $this->elasticRepository->deleteByConstraints($constraints); + + $totalTime = $timer->stop('importer'); + $io->info('Finished after ' . $this->formatMilliseconds($totalTime->getDuration()) . '. Total of ' . $deletedManualsCount . ' manuals were removed.'); + + return Command::SUCCESS; + } + + private function formatMilliseconds(int $milliseconds): string + { + $t = intdiv($milliseconds, 1000); + return sprintf('%02d:%02d:%02d', (int)($t / 3600), (int)($t / 60) % 60, $t % 60); + } +} diff --git a/src/Dto/Constraints.php b/src/Dto/Constraints.php new file mode 100644 index 0000000..7d44f5e --- /dev/null +++ b/src/Dto/Constraints.php @@ -0,0 +1,36 @@ +slug; + } + + public function getVersion(): string + { + return $this->version; + } + + public function getType(): string + { + return $this->type; + } + + public function getLanguage(): string + { + return $this->language; + } +} diff --git a/src/QueryBuilder/ElasticQueryBuilder.php b/src/QueryBuilder/ElasticQueryBuilder.php new file mode 100644 index 0000000..6a4fa49 --- /dev/null +++ b/src/QueryBuilder/ElasticQueryBuilder.php @@ -0,0 +1,34 @@ + ['must' => []]]; + + if ($constraints->getSlug() !== '') { + $query['bool']['must'][] = ['match' => ['manual_slug' => $constraints->getSlug()]]; + } + + if ($constraints->getVersion() !== '') { + $query['bool']['must'][] = ['match' => ['manual_version' => $constraints->getVersion()]]; + } + + if ($constraints->getType() !== '') { + $query['bool']['must'][] = ['match' => ['manual_type' => $constraints->getType()]]; + } + + if ($constraints->getLanguage() !== '') { + $query['bool']['must'][] = ['match' => ['manual_language' => $constraints->getLanguage()]]; + } + + return new Query(['query' => $query]); + } +} diff --git a/src/Repository/ElasticRepository.php b/src/Repository/ElasticRepository.php index c414cbf..b38cac0 100644 --- a/src/Repository/ElasticRepository.php +++ b/src/Repository/ElasticRepository.php @@ -2,8 +2,10 @@ namespace App\Repository; +use App\Dto\Constraints; use App\Dto\Manual; use App\Dto\SearchDemand; +use App\QueryBuilder\ElasticQueryBuilder; use Elastica\Aggregation\Terms; use Elastica\Client; use Elastica\Exception\InvalidException; @@ -33,7 +35,7 @@ class ElasticRepository private readonly Client $elasticClient; - public function __construct() + public function __construct(private readonly ElasticQueryBuilder $elasticQueryBuilder) { $elasticConfig = $this->getElasticSearchConfig(); @@ -116,6 +118,28 @@ public function deleteByManual(Manual $manual): void $this->elasticIndex->updateByQuery($deleteQuery, $script); } + /** + * @return int Number of deleted documents + */ + public function deleteByConstraints(Constraints $constraints): int + { + $query = $this->elasticQueryBuilder->buildQuery($constraints); + + // If a specific manual version is provided, the goal is to remove only this version from + // all associated snippets. In such cases, an update query is used instead of delete. + // This approach ensures that if a snippet has no other versions remaining after the + // removal of the specified one, the entire snippet is deleted. This deletion is + // accomplished by setting ctx.op to "delete" in the provided script. + if ($constraints->getVersion()) { + $script = new Script($this->getDeleteQueryScript(), ['manual_version' => $constraints->getVersion()], AbstractScript::LANG_PAINLESS); + $response = $this->elasticIndex->updateByQuery($query, $script, ['wait_for_completion' => true]); + } else { + $response = $this->elasticIndex->deleteByQuery($query, ['wait_for_completion' => true]); + } + + return $response->getData()['total']; + } + /** * Provide elasticsearch script which removes version (provided in params) from a snippet * and if this is the last version assigned to snippet, it deletes the snippet from index (by setting ctx.op). @@ -126,7 +150,11 @@ protected function getDeleteQueryScript(): string { $script = <<=0; i--) { + if (ctx._source.manual_version[i] == params.manual_version) { + ctx._source.manual_version.remove(i); + } + } } if (ctx._source.manual_version.size() == 0) { ctx.op = "delete"; diff --git a/tests/Unit/Command/IndexCleanerTest.php b/tests/Unit/Command/IndexCleanerTest.php new file mode 100644 index 0000000..af0b19b --- /dev/null +++ b/tests/Unit/Command/IndexCleanerTest.php @@ -0,0 +1,74 @@ + [ + [ + '--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us', + '--manual-version' => '12.4', + '--manual-type' => 'TYPO3 Manual', + '--manual-language' => 'en-us' + ], + 10 + ], + 'Some options' => [ + [ + '--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us', + '--manual-type' => 'TYPO3 Manual', + ], + 8 + ], + 'Only path' => [ + ['--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us'], + 5 + ], + 'Only version' => [ + ['--manual-version' => '12.4'], + 3 + ], + 'Only type' => [ + ['--manual-type' => 'TYPO3 Manual'], + 2 + ], + 'Only language' => [ + ['--manual-type' => 'en-us'], + 1 + ], + ]; + } + + /** + * @test + * @dataProvider usesConstraintsToPerformDeleteQueryDataProvider + */ + public function usesConstraintsToPerformDeleteQuery(array $options, int $expectedDeletions): void + { + $elasticRepositoryProphecy = $this->prophesize(ElasticRepository::class); + $elasticRepositoryProphecy + ->deleteByConstraints(Argument::type('App\Dto\Constraints')) + ->shouldBeCalledTimes(1) + ->willReturn($expectedDeletions); + + $command = new IndexCleaner($elasticRepositoryProphecy->reveal()); + $commandTester = new CommandTester($command); + $commandTester->execute($options); + $output = $commandTester->getDisplay(true); + + $this->assertSame(0, $commandTester->getStatusCode()); + $this->assertStringContainsString("Total of $expectedDeletions manuals were removed", $output); + } +} diff --git a/tests/Unit/Dto/ConstraintsTest.php b/tests/Unit/Dto/ConstraintsTest.php new file mode 100644 index 0000000..b29a03d --- /dev/null +++ b/tests/Unit/Dto/ConstraintsTest.php @@ -0,0 +1,73 @@ +assertInstanceOf(Constraints::class, $constraints); + $this->assertSame('', $constraints->getSlug()); + $this->assertSame('', $constraints->getVersion()); + $this->assertSame('', $constraints->getType()); + $this->assertSame('', $constraints->getLanguage()); + } + + /** + * @test + */ + public function canBeInstantiatedWithCustomValues(): void + { + $constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us', '12.4', 'TYPO3 Manual', 'en-us'); + + $this->assertInstanceOf(Constraints::class, $constraints); + $this->assertSame('m/typo3/reference-coreapi/12.4/en-us', $constraints->getSlug()); + $this->assertSame('12.4', $constraints->getVersion()); + $this->assertSame('TYPO3 Manual', $constraints->getType()); + $this->assertSame('en-us', $constraints->getLanguage()); + } + + /** + * @test + */ + public function getSlugReturnsCorrectValue(): void + { + $constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us'); + $this->assertSame('m/typo3/reference-coreapi/12.4/en-us', $constraints->getSlug()); + } + + /** + * @test + */ + public function getVersionReturnsCorrectValue(): void + { + $constraints = new Constraints('', '12.4'); + $this->assertSame('12.4', $constraints->getVersion()); + } + + /** + * @test + */ + public function getTypeReturnsCorrectValue(): void + { + $constraints = new Constraints('', '', 'TYPO3 Manual'); + $this->assertSame('TYPO3 Manual', $constraints->getType()); + } + + /** + * @test + */ + public function getLanguageReturnsCorrectValue(): void + { + $constraints = new Constraints('', '', '', 'en-us'); + $this->assertSame('en-us', $constraints->getLanguage()); + } +} diff --git a/tests/Unit/QueryBuilder/ElasticQueryBuilderTest.php b/tests/Unit/QueryBuilder/ElasticQueryBuilderTest.php new file mode 100644 index 0000000..9da9255 --- /dev/null +++ b/tests/Unit/QueryBuilder/ElasticQueryBuilderTest.php @@ -0,0 +1,185 @@ +buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithOnlySlugConstraint(): void + { + $constraints = new Constraints('test-slug'); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + 'manual_slug' => 'test-slug' + ] + ] + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithOnlyVersionConstraint(): void + { + $constraints = new Constraints('', '12.04'); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + 'manual_version' => '12.04' + ] + ] + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithOnlyTypeConstraint(): void + { + $constraints = new Constraints('', '', 'TYPO3 Manual'); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + 'manual_type' => 'TYPO3 Manual' + ] + ] + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithOnlyLanguageConstraint(): void + { + $constraints = new Constraints('', '', '', 'en-us'); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + 'manual_language' => 'en-us' + ] + ] + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithSomeConstraints(): void + { + $constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us', '', 'TYPO3 Manual', ''); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + ['match' => ['manual_slug' => 'm/typo3/reference-coreapi/12.4/en-us']], + ['match' => ['manual_type' => 'TYPO3 Manual']], + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } + + /** + * @test + */ + public function buildQueryWithAllConstraints(): void + { + $constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us', '12.4', 'TYPO3 Manual', 'en-us'); + $queryBuilder = new ElasticQueryBuilder(); + $query = $queryBuilder->buildQuery($constraints); + + $expectedQuery = new Query([ + 'query' => [ + 'bool' => [ + 'must' => [ + ['match' => ['manual_slug' => 'm/typo3/reference-coreapi/12.4/en-us']], + ['match' => ['manual_version' => '12.4']], + ['match' => ['manual_type' => 'TYPO3 Manual']], + ['match' => ['manual_language' => 'en-us']] + ] + ] + ] + ]); + + $this->assertEquals($expectedQuery, $query); + } +} \ No newline at end of file