Skip to content

Commit

Permalink
Bugfix/update documentation and fix bugs (#7)
Browse files Browse the repository at this point in the history
* Update documentation
* Fix bug when creating scraped datasets manually
* Generate url hash automatically
  • Loading branch information
joskfg authored Mar 28, 2022
1 parent ae3e629 commit 8398524
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 19 deletions.
63 changes: 45 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,21 @@ ScrapedDataset::create([
'url' => 'https://test.c/p/my-objective',
'type' => 'Item-definition-1',
'data' => [
'title' => 'My title',
'body' => 'This is the body content I want to get',
'images' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
[
'key' => 'title',
'value' => 'My title',
],
[
'key' => 'body',
'value' => 'This is the body content I want to get',
],
[
'key' => 'images',
'value' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
],
],
],
]);
Expand Down Expand Up @@ -124,12 +133,21 @@ ScrapedDataset::create([
'type' => 'Item-definition-1',
'variant' => '8ed10778a83f1266e7ffed90205f7fb61ddcdf78',
'data' => [
'title' => 'My title',
'body' => 'This is the body content I want to get',
'images' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
[
'key' => 'title',
'value' => 'My title',
],
[
'key' => 'body',
'value' => 'This is the body content I want to get',
],
[
'key' => 'images',
'value' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
],
],
],
]);
Expand All @@ -152,12 +170,21 @@ ScrapedDataset::create([
'type' => 'Item-definition-1',
'variant' => '8ed10778a83f1266e7ffed90205f7fb61ddcdf78',
'data' => [
'title' => 'My title',
'body' => regexp('/^Body starts here, but it is so long that.*$/si'),
'images' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
[
'key' => 'title',
'value' => 'My title',
],
[
'key' => 'body',
'value' => regexp('/^Body starts here, but it is so long that.*$/si'),
],
[
'key' => 'images',
'value' => [
'https://test.c/images/1.jpg',
'https://test.c/images/2.jpg',
'https://test.c/images/3.jpg',
],
],
],
]);
Expand Down
1 change: 1 addition & 0 deletions src/Scraper/Application/Configurator.php
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ private function findConfigByScrapedData(ScrapedDataset $scrapedData, Crawler $c
$result = [];

foreach ($scrapedData['fields'] as $field) {
$field['found'] = $field['found'] ?? true;
if (!$field['found']) {
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Scraper/Models/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Configuration extends Model

public function getXpathsAttribute($xpaths): array
{
return (array) $this->castAttribute('xpaths', $xpaths);
return (array)$this->castAttribute('xpaths', $xpaths);
}

public function scopeWithType($query, string $type)
Expand Down
7 changes: 7 additions & 0 deletions src/Scraper/Models/ScrapedDataset.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ class ScrapedDataset extends Model
'fields',
];

protected static function booted()
{
static::creating(function ($model) {
$model->url_hash = hash('sha256', $model->url);
});
}

public function scopeWithType($query, string $type)
{
return $query->where('type', $type);
Expand Down

0 comments on commit 8398524

Please sign in to comment.