diff --git a/README.md b/README.md index a6fa99e..9e3037b 100644 --- a/README.md +++ b/README.md @@ -80,12 +80,21 @@ ScrapedDataset::create([ 'url' => 'https://test.c/p/my-objective', 'type' => 'Item-definition-1', 'data' => [ - 'title' => 'My title', - 'body' => 'This is the body content I want to get', - 'images' => [ - 'https://test.c/images/1.jpg', - 'https://test.c/images/2.jpg', - 'https://test.c/images/3.jpg', + [ + 'key' => 'title', + 'value' => 'My title', + ], + [ + 'key' => 'body', + 'value' => 'This is the body content I want to get', + ], + [ + 'key' => 'images', + 'value' => [ + 'https://test.c/images/1.jpg', + 'https://test.c/images/2.jpg', + 'https://test.c/images/3.jpg', + ], ], ], ]); @@ -124,12 +133,21 @@ ScrapedDataset::create([ 'type' => 'Item-definition-1', 'variant' => '8ed10778a83f1266e7ffed90205f7fb61ddcdf78', 'data' => [ - 'title' => 'My title', - 'body' => 'This is the body content I want to get', - 'images' => [ - 'https://test.c/images/1.jpg', - 'https://test.c/images/2.jpg', - 'https://test.c/images/3.jpg', + [ + 'key' => 'title', + 'value' => 'My title', + ], + [ + 'key' => 'body', + 'value' => 'This is the body content I want to get', + ], + [ + 'key' => 'images', + 'value' => [ + 'https://test.c/images/1.jpg', + 'https://test.c/images/2.jpg', + 'https://test.c/images/3.jpg', + ], ], ], ]); @@ -152,12 +170,21 @@ ScrapedDataset::create([ 'type' => 'Item-definition-1', 'variant' => '8ed10778a83f1266e7ffed90205f7fb61ddcdf78', 'data' => [ - 'title' => 'My title', - 'body' => regexp('/^Body starts here, but it is so long that.*$/si'), - 'images' => [ - 'https://test.c/images/1.jpg', - 'https://test.c/images/2.jpg', - 'https://test.c/images/3.jpg', + [ + 'key' => 'title', + 'value' => 'My title', + ], + [ + 'key' => 'body', + 'value' => regexp('/^Body starts here, but it is so long that.*$/si'), + ], + [ + 'key' => 'images', + 'value' => [ + 'https://test.c/images/1.jpg', + 'https://test.c/images/2.jpg', + 'https://test.c/images/3.jpg', + ], ], ], ]); diff --git a/src/Scraper/Application/Configurator.php b/src/Scraper/Application/Configurator.php index 96fc1b6..ea3d7ad 100644 --- a/src/Scraper/Application/Configurator.php +++ b/src/Scraper/Application/Configurator.php @@ -96,6 +96,7 @@ private function findConfigByScrapedData(ScrapedDataset $scrapedData, Crawler $c $result = []; foreach ($scrapedData['fields'] as $field) { + $field['found'] = $field['found'] ?? true; if (!$field['found']) { continue; } diff --git a/src/Scraper/Models/Configuration.php b/src/Scraper/Models/Configuration.php index 20a6fc8..6937b25 100644 --- a/src/Scraper/Models/Configuration.php +++ b/src/Scraper/Models/Configuration.php @@ -46,7 +46,7 @@ class Configuration extends Model public function getXpathsAttribute($xpaths): array { - return (array) $this->castAttribute('xpaths', $xpaths); + return (array)$this->castAttribute('xpaths', $xpaths); } public function scopeWithType($query, string $type) diff --git a/src/Scraper/Models/ScrapedDataset.php b/src/Scraper/Models/ScrapedDataset.php index e992d78..bb5c0ed 100644 --- a/src/Scraper/Models/ScrapedDataset.php +++ b/src/Scraper/Models/ScrapedDataset.php @@ -40,6 +40,13 @@ class ScrapedDataset extends Model 'fields', ]; + protected static function booted() + { + static::creating(function ($model) { + $model->url_hash = hash('sha256', $model->url); + }); + } + public function scopeWithType($query, string $type) { return $query->where('type', $type);