Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Use specific event to update dataset in configuration process
Browse files Browse the repository at this point in the history
  • Loading branch information
joskfg authored Oct 10, 2018
1 parent 6cbda18 commit de45387
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 21 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ $event->scrapeRequest->url // Url scraped
$event->scrapeRequest->type // Request type
```

## Advanced usage

There is another event named `ConfigurationScraped` that is triggered when a scrape is done automatically in the
reconfiguration step. It is exactly the same than `Scraped` event. It is named differently because usually it is not
interesting to use it apart from internally to update the dataset.

`ConfigurationScraped` can be used to do updates or to know internals about the configuration process.

### Queue workers

You need to workers, one for the default queue and another for the `configure` queue. The `configure` worker
Expand Down
17 changes: 10 additions & 7 deletions src/Scraper/Application/Configurator.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
use Goutte\Client;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
use Softonic\LaravelIntelligentScraper\Scraper\Events\ConfigurationScraped;
use Softonic\LaravelIntelligentScraper\Scraper\Events\ScrapeRequest;
use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\ConfigurationException;
use Softonic\LaravelIntelligentScraper\Scraper\Models\Configuration;
use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset;
Expand Down Expand Up @@ -114,7 +116,14 @@ private function findConfigByScrapedData($scrapedData, $crawler, $currentConfigu
}
}

$this->updateVariant($scrapedData);
event(new ConfigurationScraped(
new ScrapeRequest(
$scrapedData['url'],
$scrapedData['type']
),
$scrapedData['data'],
$this->variantGenerator->getId($scrapedData['type'])
));

return $result;
}
Expand Down Expand Up @@ -175,10 +184,4 @@ private function checkConfiguration($data, Collection $finalConfig)
throw new ConfigurationException("Field(s) \"{$fieldsMissing}\" not found.", 0);
}
}

private function updateVariant($scrapedData): void
{
$scrapedData['variant'] = $this->variantGenerator->getId($scrapedData['type']);
$scrapedData->save();
}
}
7 changes: 7 additions & 0 deletions src/Scraper/Events/ConfigurationScraped.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?php

namespace Softonic\LaravelIntelligentScraper\Scraper\Events;

class ConfigurationScraped extends Scraped
{
}
4 changes: 4 additions & 0 deletions src/ScraperProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Illuminate\Foundation\Support\Providers\EventServiceProvider;
use Softonic\LaravelIntelligentScraper\Scraper\Application\XpathBuilder;
use Softonic\LaravelIntelligentScraper\Scraper\Events\ConfigurationScraped;
use Softonic\LaravelIntelligentScraper\Scraper\Events\InvalidConfiguration;
use Softonic\LaravelIntelligentScraper\Scraper\Events\Scraped;
use Softonic\LaravelIntelligentScraper\Scraper\Events\ScrapeRequest;
Expand All @@ -28,6 +29,9 @@ class ScraperProvider extends EventServiceProvider
Scraped::class => [
UpdateDataset::class,
],
ConfigurationScraped::class => [
UpdateDataset::class,
],
];

/**
Expand Down
26 changes: 12 additions & 14 deletions tests/Unit/Scraper/Application/ConfiguratorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use Illuminate\Foundation\Testing\DatabaseMigrations;
use Illuminate\Support\Facades\Log;
use Mockery\Mock;
use Softonic\LaravelIntelligentScraper\Scraper\Events\ConfigurationScraped;
use Softonic\LaravelIntelligentScraper\Scraper\Exceptions\ConfigurationException;
use Softonic\LaravelIntelligentScraper\Scraper\Models\Configuration as ConfigurationModel;
use Softonic\LaravelIntelligentScraper\Scraper\Models\ScrapedDataset;
Expand Down Expand Up @@ -150,18 +151,18 @@ public function whenTryToFindNewXpathButNotFoundItShouldLogItAndResetVariant()
$this->variantGenerator->shouldReceive('fieldNotFound')
->once();
$this->variantGenerator->shouldReceive('getId')
->andReturnNull();
->andReturn('');

Log::shouldReceive('warning')
->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'.");

$this->expectsEvents(ConfigurationScraped::class);

try {
$this->configurator->configureFromDataset($posts);
} catch (ConfigurationException $e) {
$this->assertEquals('Field(s) "author" not found.', $e->getMessage());
}

$this->assertNull($posts[0]['variant']);
}

/**
Expand Down Expand Up @@ -225,18 +226,18 @@ public function whenUseSomeOldXpathButNotFoundNewsItShouldLogItAndResetVariant()
$this->variantGenerator->shouldReceive('fieldNotFound')
->once();
$this->variantGenerator->shouldReceive('getId')
->andReturnNull();
->andReturn('');

Log::shouldReceive('warning')
->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'.");

$this->expectsEvents(ConfigurationScraped::class);

try {
$this->configurator->configureFromDataset($posts);
} catch (ConfigurationException $e) {
$this->assertEquals('Field(s) "author" not found.', $e->getMessage());
}

$this->assertNull($posts[0]['variant']);
}

/**
Expand Down Expand Up @@ -306,22 +307,21 @@ public function whenTryToFindXpathInMultiplepostsAndNotFoundInAnyItShouldThrowAn
$this->variantGenerator->shouldReceive('fieldNotFound')
->times(4);
$this->variantGenerator->shouldReceive('getId')
->andReturnNull();
->andReturn('');

Log::shouldReceive('warning')
->with("Field 'title' with value 'My Title' not found for 'https://test.c/123456789012'.");

Log::shouldReceive('warning')
->with("Field 'author' with value 'My author' not found for 'https://test.c/123456789012'.");

$this->expectsEvents(ConfigurationScraped::class);

try {
$this->configurator->configureFromDataset($posts);
} catch (ConfigurationException $e) {
$this->assertEquals('Field(s) "title,author" not found.', $e->getMessage());
}

$this->assertNull($posts[0]['variant']);
$this->assertNull($posts[1]['variant']);
}

/**
Expand Down Expand Up @@ -413,6 +413,8 @@ public function whenDiscoverDifferentXpathItShouldGetAllOfThemAndUpdateTheVarian
$this->variantGenerator->shouldReceive('getId')
->andReturn(10, 20, 30);

$this->expectsEvents(ConfigurationScraped::class);

$configurations = $this->configurator->configureFromDataset($posts);

$this->assertInstanceOf(ConfigurationModel::class, $configurations[0]);
Expand All @@ -436,9 +438,5 @@ public function whenDiscoverDifferentXpathItShouldGetAllOfThemAndUpdateTheVarian
],
array_values($configurations[1]['xpaths'])
);

$this->assertEquals($posts[0]['variant'], 10);
$this->assertEquals($posts[1]['variant'], 20);
$this->assertEquals($posts[2]['variant'], 30);
}
}

0 comments on commit de45387

Please sign in to comment.