From 8d13db12ff16c59bf5a8ee948b850697da052e9f Mon Sep 17 00:00:00 2001 From: George Baev Date: Wed, 26 Jan 2022 08:41:06 +0200 Subject: [PATCH 1/2] WS2-1205: Add webspark_cas module. --- README.md | 13 +++++ src/EventSubscriber/WebSparkCasSubscriber.php | 52 +++++++++++++++++++ webspark_cas.info.yml | 8 +++ webspark_cas.services.yml | 6 +++ 4 files changed, 79 insertions(+) create mode 100644 README.md create mode 100644 src/EventSubscriber/WebSparkCasSubscriber.php create mode 100644 webspark_cas.info.yml create mode 100644 webspark_cas.services.yml diff --git a/README.md b/README.md new file mode 100644 index 0000000..4819bd6 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# WebSpark module + +The WebSpark CAS module contains functionality which allows the Elastic Crawler to bypass the CAS redirect and process the web pages successfully. + +## Setup + +The following setting should be added to the `settings.php` file. If no such setting exists, the default setting will be used. + +```php +$settings['webspark_cas_elastic_crawler_regex'] = '/^Elastic-Crawler .*$/'; +``` + +This setting describes the RegExp used to determine the `Elastic Crawler` request based on its `User-Agent` HTTP Header value. diff --git a/src/EventSubscriber/WebSparkCasSubscriber.php b/src/EventSubscriber/WebSparkCasSubscriber.php new file mode 100644 index 0000000..4279afc --- /dev/null +++ b/src/EventSubscriber/WebSparkCasSubscriber.php @@ -0,0 +1,52 @@ +isElasticCrawlerRequest()) { + return; + } + + return parent::handle($event); + } + + /** + * Checks if it is Elastic Crawler request. + * + * @return bool + * The check result. + */ + protected function isElasticCrawlerRequest(): bool { + $current_request = $this->requestStack->getCurrentRequest(); + + $defaultPattern = '/^Elastic-Crawler .*$/'; + + // Get the regex from $settings if available. + $elasticPattern = Settings::get('webspark_cas_elastic_crawler_regex', $defaultPattern); + + $agent = $current_request->server->get('HTTP_USER_AGENT'); + if (empty($agent)) { + return FALSE; + } + + if (\preg_match($elasticPattern, $agent)) { + // Allow the Elastic crawler. + return TRUE; + } + + return FALSE; + } + +} diff --git a/webspark_cas.info.yml b/webspark_cas.info.yml new file mode 100644 index 0000000..f07d5b1 --- /dev/null +++ b/webspark_cas.info.yml @@ -0,0 +1,8 @@ +name: WebSpark CAS +type: module +description: WebSpark CAS +package: WebSpark +core: 8.x +core_version_requirement: ^8 || ^9 +dependencies: + - cas:cas diff --git a/webspark_cas.services.yml b/webspark_cas.services.yml new file mode 100644 index 0000000..a4795e3 --- /dev/null +++ b/webspark_cas.services.yml @@ -0,0 +1,6 @@ +services: + cas.subscriber: + class: Drupal\webspark_cas\EventSubscriber\WebSparkCasSubscriber + arguments: ['@request_stack', '@current_route_match', '@config.factory', '@current_user', '@plugin.manager.condition', '@cas.helper', '@cas.redirector'] + tags: + - { name: event_subscriber } From 571d77b94a2ca4b63decf975a0ec8f4b0811dcd2 Mon Sep 17 00:00:00 2001 From: George Baev Date: Wed, 26 Jan 2022 15:52:23 +0200 Subject: [PATCH 2/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4819bd6..9500f0b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# WebSpark module +# WebSpark CAS module The WebSpark CAS module contains functionality which allows the Elastic Crawler to bypass the CAS redirect and process the web pages successfully.