From d8497d2f993f4796acdacff0ac645932db2241b6 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Thu, 23 Feb 2023 09:44:36 -0300 Subject: [PATCH 1/7] =?UTF-8?q?base=20da=20orquestra=C3=A7=C3=A3o=20de=20s?= =?UTF-8?q?crapers=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Scraper/domain/value_object.py | 4 +- .../scraper orchestration/orchestrator.py | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 src/Scraper/infrastructure/scraper orchestration/orchestrator.py diff --git a/src/Scraper/domain/value_object.py b/src/Scraper/domain/value_object.py index fc6bd24..afa0bfd 100644 --- a/src/Scraper/domain/value_object.py +++ b/src/Scraper/domain/value_object.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Tuple, List from dataclasses import dataclass from framework.domain.value_object import URL, ValueObject, Money from abc import ABC, abstractmethod @@ -7,5 +7,5 @@ @dataclass class AbstractScraper(ABC, ValueObject): @abstractmethod - def get_volatile_data(self) -> Tuple[URL, str, Money, int]: + def get_volatile_data(self, url: URL) -> Tuple[URL, List[Tuple[str, Money, bool]]]: pass diff --git a/src/Scraper/infrastructure/scraper orchestration/orchestrator.py b/src/Scraper/infrastructure/scraper orchestration/orchestrator.py new file mode 100644 index 0000000..2619649 --- /dev/null +++ b/src/Scraper/infrastructure/scraper orchestration/orchestrator.py @@ -0,0 +1,49 @@ +from time import time +from typing import List, Tuple + +from framework.domain.value_object import URL, Money +from framework.domain.components import Component +from Scraper.domain.service import FactoryScraper +from Scraper.domain.aggragate import VolatileData +from framework.infrastructure.db_management.db_structure import VolatileDataInstance + +seconds_between_requests = 1 +urls: List[URL] +scraper_factory = FactoryScraper() + + +def convert_to_db_volatile_data(volatile_data: VolatileData) -> VolatileDataInstance: + # TODO: converter objeto 'volatile_data' para intancia do banco de dados + + return VolatileDataInstance() + + +def store_volatile_data_on_db(volatile_data: VolatileData): + db_volatile_data = convert_to_db_volatile_data(volatile_data) + + # TODO: armazenar o dado volátil no banco de dados + + +def store_volatile_datas_on_db(volatile_datas: List[VolatileData]): + [store_volatile_data_on_db(volatile_data) for volatile_data in volatile_datas] + + +def run_scrapers(): + for url in urls: + # TODO: implementar temporização randomizada em um intervalo de segundos + scraper = scraper_factory.build_scraper(domain=url.url) + current_url: URL = url + volatile_data_values: List[Tuple[str, Money, bool]] + + while current_url != None: + next_url, volatile_datas_values = scraper.get_volatile_data(current_url) + + for name, price, availability in volatile_datas_values: + # TODO: fazer chamada da engine de busca para classificar o componente + component: Component + # component = SearchEngine.classificate(name) + volatile_data: VolatileData + # Volatile_data = VolatileData(component_id=component.uid, uid(current_url), current_url, cost, availability) + # store_volatile_data_on_db(volatile_data) + + current_url = next_url From 38777d5804fd02afe6c008d875316c28630276b8 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Tue, 28 Feb 2023 09:42:08 -0300 Subject: [PATCH 2/7] =?UTF-8?q?conex=C3=A3o=20entre=20orquestrador=20de=20?= =?UTF-8?q?scrapping=20e=20base=20de=20dados.=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Scraper/domain/aggragate.py | 15 ++-- src/Scraper/domain/repositories.py | 70 ++++++++++++++++ .../ScraperOrchestration/orchestrator.py | 45 ++++++++++ .../SQL_alchemy_volatile_data.py | 83 +++++++++++++++++++ .../scraper orchestration/orchestrator.py | 49 ----------- src/SearchEngine/application/unit_of_work.py | 2 +- src/SearchEngine/domain/repositories.py | 1 + .../SQL_alchemy_repository.py | 7 +- .../__init__.py | 0 .../component_mapper.py | 13 +-- .../db_management/db_mapping.py | 7 ++ .../db_management/db_structure.py | 10 ++- 12 files changed, 234 insertions(+), 68 deletions(-) create mode 100644 src/Scraper/domain/repositories.py create mode 100644 src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py create mode 100644 src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py delete mode 100644 src/Scraper/infrastructure/scraper orchestration/orchestrator.py rename src/SearchEngine/infrastructure/{component_managment => ComponentManagment}/SQL_alchemy_repository.py (94%) rename src/SearchEngine/infrastructure/{component_managment => ComponentManagment}/__init__.py (100%) rename src/SearchEngine/infrastructure/{component_managment => ComponentManagment}/component_mapper.py (81%) create mode 100644 src/framework/infrastructure/db_management/db_mapping.py diff --git a/src/Scraper/domain/aggragate.py b/src/Scraper/domain/aggragate.py index 512b649..1676513 100644 --- a/src/Scraper/domain/aggragate.py +++ b/src/Scraper/domain/aggragate.py @@ -1,25 +1,31 @@ from datetime import datetime from dataclasses import dataclass, field +from typing import List + from framework.domain.entity import AggregateRoot from framework.domain.value_object import UUID, Money, URL -from .entity import MatchesTrackedComponent + +_AttrsVolatileData = ["_id", "url", "component_id", "cost", "availability", "timestamp"] @dataclass(kw_only=True) class VolatileData(AggregateRoot): - component_id: UUID - url_id: UUID + # url_id: UUID url: URL + component_id: UUID cost: Money availability: bool timestamp: datetime = field(default=datetime.utcnow()) + @classmethod + def get_attrs(cls) -> List[str]: + return _AttrsVolatileData.copy() + def generateVolatileDataPoint( self, _id: UUID, component_id: UUID, - url_id: UUID, url: URL, cost: Money, availability: bool, @@ -27,7 +33,6 @@ def generateVolatileDataPoint( return VolatileData( _id=_id, component_id=component_id, - url_id=url_id, url=url, cost=cost, availability=availability, diff --git a/src/Scraper/domain/repositories.py b/src/Scraper/domain/repositories.py new file mode 100644 index 0000000..e095d4d --- /dev/null +++ b/src/Scraper/domain/repositories.py @@ -0,0 +1,70 @@ +from dataclasses import dataclass, field +from typing import Dict +from abc import ABCMeta, abstractmethod + +from framework.domain.value_object import UUID +from framework.domain.repository import AbstractRepository +from framework.domain.exception import DomainException +from Scraper.domain.aggragate import VolatileData + + +@dataclass +class EntityUIDNotFoundException(DomainException): + entity_id: UUID + _message: str = field(init=False) + + def __post_init__(self): + self._message = f"{self.__class__.__name__}: " + f"Componente com UID {self.entity_id} não existe." + + +class MockRepository(AbstractRepository): + def __init__(self, volatile_datas: Dict[UUID, VolatileData]): + self._volatile_data = volatile_datas + + def _add(self, volatile_data: VolatileData): + self._volatile_data[volatile_data.uid] = volatile_data + + def _get_by_uid(self, ref: UUID): + ret = self._volatile_data.get(ref, None) + if ret: + return self._volatile_data[ref] + raise EntityUIDNotFoundException(ref) + + def _get(self, **kwargs): + qsize = kwargs.get("qsize", 10) + ctype = kwargs.get("availability", None) + + ret = list() + if ctype: + for v in self._volatile_data.values(): + if v.availability == True: + ret.append(v) + if len(ret) == qsize: + break + + return ret + + def __repr__(self): + return str(self._volatile_data) + + +class ISQLAlchemyRepository(AbstractRepository, metaclass=ABCMeta): + @abstractmethod + def __init__(self, session): + raise NotImplemented + + @abstractmethod + def _add(self, volatile_data: VolatileData): + raise NotImplemented + + @abstractmethod + def _get_by_uid(self, ref: UUID): + raise NotImplemented + + @abstractmethod + def _get(self, **kwargs): + raise NotImplemented + + def __repr__(self): + raise NotImplemented diff --git a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py new file mode 100644 index 0000000..3545d4c --- /dev/null +++ b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py @@ -0,0 +1,45 @@ +from time import time +from typing import List, Tuple + +from framework.domain.value_object import UUID +from framework.domain.value_object import URL +from framework.domain.components import Component +from Scraper.domain.service import FactoryScraper +from Scraper.domain.aggragate import VolatileData +from Scraper.infrastructure.VolatileDataManagment.SQL_alchemy_volatile_data import ( + SQLAlchemyVolatile_data, +) + +seconds_between_requests = 1 +urls: List[URL] = [URL("www.google.com", "", "", "")] # category +scraper_factory = FactoryScraper() + + +def run_scrapers(session): + volatile_data_manager = SQLAlchemyVolatile_data(session) + + for category_url in urls: + # TODO: implementar temporização randomizada em um intervalo de segundos + scraper = scraper_factory.build_scraper(domain=category_url.url) + page_url: URL = category_url + + while page_url != None: + page_url, volatile_datas_values = scraper.get_volatile_data(page_url) + + for url, name, cost, availability in volatile_datas_values: + # TODO: fazer chamada da engine de busca para classificar o componente + # component = SearchEngine.classifie(name) + + component = Component( + _id=Component.next_id(), manufacturer="1", model="2" + ) # placeholder + + volatile_data = VolatileData( + _id=UUID(url.url), + component_id=component.uid, + url=url, + cost=cost, + availability=availability, + ) + + volatile_data_manager.add(volatile_data) diff --git a/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py new file mode 100644 index 0000000..a8ddc38 --- /dev/null +++ b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py @@ -0,0 +1,83 @@ +from sqlalchemy.orm.session import Session +from sqlalchemy.engine import Row + +from framework.domain.events import DomainEvent +from Scraper.domain.aggragate import VolatileData +from framework.domain.value_object import UUID +from framework.infrastructure.db_management.db_mapping import map_from_to +from framework.infrastructure.db_management.db_structure import ( + VolatileDataInstance, + AttrsVolatileData, +) +from sqlalchemy.exc import NoResultFound +from Scraper.domain.repositories import ( + ISQLAlchemyRepository, + EntityUIDNotFoundException, +) + + +class SQLAlchemyVolatile_data(ISQLAlchemyRepository): + def __init__(self, session): + self._session: Session = session + + def volatile_data_to_db_object( + self, volatile_data: VolatileData + ) -> VolatileDataInstance: + mapped_vol_data = map_from_to( + volatile_data, VolatileData.get_attrs(), AttrsVolatileData + ) + + return VolatileDataInstance(**mapped_vol_data) + + def db_object_to_volatile_data( + self, volatile_data_instance: VolatileDataInstance + ) -> VolatileData: + mapped_vol_data = map_from_to( + volatile_data_instance, AttrsVolatileData, VolatileData.get_attrs() + ) + + return VolatileData(**mapped_vol_data) + + def _get_instance_by_uid(self, ref: UUID) -> VolatileDataInstance: + query_filter = [VolatileDataInstance.url_id == ref] + + try: + ctype: Row = ( + self._session.query(VolatileDataInstance.type) + .filter(*query_filter) + .one() + ) + + vol_data_inst: VolatileDataInstance = ( + self._session.query(VolatileDataInstance).filter(*query_filter).one() + ) + + except NoResultFound: + raise EntityUIDNotFoundException(ref) + + return vol_data_inst + + def _add(self, volatile_data: VolatileData): + db_volatile_data: VolatileDataInstance = self.volatile_data_to_db_object( + volatile_data + ) + + try: + current_volatile_data = self._get_instance_by_uid(volatile_data.uid) + + if current_volatile_data.cost > db_volatile_data.cost: + # TODO lançar evento de redução de preço + pass + + current_volatile_data.__dict__.update(db_volatile_data.__dict__) + + except NoResultFound: + self._session.add(db_volatile_data) + + self._session.commit() + + def _get(self, **kwargs): + return super()._get(**kwargs) + + def _get_by_uid(self, ref: UUID): + return super()._get_by_uid(ref) diff --git a/src/Scraper/infrastructure/scraper orchestration/orchestrator.py b/src/Scraper/infrastructure/scraper orchestration/orchestrator.py deleted file mode 100644 index 2619649..0000000 --- a/src/Scraper/infrastructure/scraper orchestration/orchestrator.py +++ /dev/null @@ -1,49 +0,0 @@ -from time import time -from typing import List, Tuple - -from framework.domain.value_object import URL, Money -from framework.domain.components import Component -from Scraper.domain.service import FactoryScraper -from Scraper.domain.aggragate import VolatileData -from framework.infrastructure.db_management.db_structure import VolatileDataInstance - -seconds_between_requests = 1 -urls: List[URL] -scraper_factory = FactoryScraper() - - -def convert_to_db_volatile_data(volatile_data: VolatileData) -> VolatileDataInstance: - # TODO: converter objeto 'volatile_data' para intancia do banco de dados - - return VolatileDataInstance() - - -def store_volatile_data_on_db(volatile_data: VolatileData): - db_volatile_data = convert_to_db_volatile_data(volatile_data) - - # TODO: armazenar o dado volátil no banco de dados - - -def store_volatile_datas_on_db(volatile_datas: List[VolatileData]): - [store_volatile_data_on_db(volatile_data) for volatile_data in volatile_datas] - - -def run_scrapers(): - for url in urls: - # TODO: implementar temporização randomizada em um intervalo de segundos - scraper = scraper_factory.build_scraper(domain=url.url) - current_url: URL = url - volatile_data_values: List[Tuple[str, Money, bool]] - - while current_url != None: - next_url, volatile_datas_values = scraper.get_volatile_data(current_url) - - for name, price, availability in volatile_datas_values: - # TODO: fazer chamada da engine de busca para classificar o componente - component: Component - # component = SearchEngine.classificate(name) - volatile_data: VolatileData - # Volatile_data = VolatileData(component_id=component.uid, uid(current_url), current_url, cost, availability) - # store_volatile_data_on_db(volatile_data) - - current_url = next_url diff --git a/src/SearchEngine/application/unit_of_work.py b/src/SearchEngine/application/unit_of_work.py index 175a853..3df9b8c 100644 --- a/src/SearchEngine/application/unit_of_work.py +++ b/src/SearchEngine/application/unit_of_work.py @@ -1,7 +1,7 @@ from framework.application.uow import AbstractUnitOfWork from ..domain.repositories import MockRepository -from ..infrastructure.component_managment.SQL_alchemy_repository import ( +from ..infrastructure.ComponentManagment.SQL_alchemy_repository import ( SQLAlchemyRepository, ) diff --git a/src/SearchEngine/domain/repositories.py b/src/SearchEngine/domain/repositories.py index 5addb34..f60315a 100644 --- a/src/SearchEngine/domain/repositories.py +++ b/src/SearchEngine/domain/repositories.py @@ -6,6 +6,7 @@ from framework.domain.repository import AbstractRepository from framework.domain.exception import DomainException from framework.domain.components import Component +from Scraper.domain.aggragate import VolatileData @dataclass diff --git a/src/SearchEngine/infrastructure/component_managment/SQL_alchemy_repository.py b/src/SearchEngine/infrastructure/ComponentManagment/SQL_alchemy_repository.py similarity index 94% rename from src/SearchEngine/infrastructure/component_managment/SQL_alchemy_repository.py rename to src/SearchEngine/infrastructure/ComponentManagment/SQL_alchemy_repository.py index c276f80..cca7866 100644 --- a/src/SearchEngine/infrastructure/component_managment/SQL_alchemy_repository.py +++ b/src/SearchEngine/infrastructure/ComponentManagment/SQL_alchemy_repository.py @@ -15,7 +15,7 @@ ComponentInstance, component_inst_idx, ) -from SearchEngine.infrastructure.component_managment.component_mapper import * +from SearchEngine.infrastructure.ComponentManagment.component_mapper import * class SQLAlchemyRepository(ISQLAlchemyRepository): @@ -41,11 +41,14 @@ def _get_by_uid(self, ref: UUID) -> Component: self._session.query(ComponentInstance.type).filter(*query_filter).one() ) - component: Component = ( + component_inst: ComponentInstance = ( self._session.query(component_inst_idx[ctype[0]]) .filter(*query_filter) .one() ) + + component = bd_object_to_component(component_inst) + except NoResultFound: raise EntityUIDNotFoundException(ref) diff --git a/src/SearchEngine/infrastructure/component_managment/__init__.py b/src/SearchEngine/infrastructure/ComponentManagment/__init__.py similarity index 100% rename from src/SearchEngine/infrastructure/component_managment/__init__.py rename to src/SearchEngine/infrastructure/ComponentManagment/__init__.py diff --git a/src/SearchEngine/infrastructure/component_managment/component_mapper.py b/src/SearchEngine/infrastructure/ComponentManagment/component_mapper.py similarity index 81% rename from src/SearchEngine/infrastructure/component_managment/component_mapper.py rename to src/SearchEngine/infrastructure/ComponentManagment/component_mapper.py index 08fa114..1207725 100644 --- a/src/SearchEngine/infrastructure/component_managment/component_mapper.py +++ b/src/SearchEngine/infrastructure/ComponentManagment/component_mapper.py @@ -1,5 +1,6 @@ from typing import List from framework.domain.components import * +from framework.infrastructure.db_management.db_mapping import map_from_to from framework.infrastructure.db_management.db_structure import ( ComponentInstance, component_inst_idx, @@ -20,18 +21,10 @@ def _get_attrs_from(c_type: EComponentType): return comp_attrs, comp_inst_attrs -def _map_from_to( - component: Component | ComponentInstance, from_attrs: List, to_attrs: List -) -> dict: - mapped = {t: getattr(component, f) for t, f in zip(to_attrs, from_attrs)} - - return mapped - - def component_to_bd_object(component: Component) -> ComponentInstance: specific_inst_cls = component_inst_idx[component.type.value] comp_attrs, comp_inst_attrs = _get_attrs_from(component.type) - mapped_comp_dict = _map_from_to(component, comp_attrs, comp_inst_attrs) + mapped_comp_dict = map_from_to(component, comp_attrs, comp_inst_attrs) return specific_inst_cls(**mapped_comp_dict) @@ -41,7 +34,7 @@ def bd_object_to_component(component_instance: ComponentInstance) -> Component: comp_attrs, comp_inst_attrs = _get_attrs_from( EComponentType(component_instance.type) ) - mapped_comp_dict = _map_from_to(component_instance, comp_inst_attrs, comp_attrs) + mapped_comp_dict = map_from_to(component_instance, comp_inst_attrs, comp_attrs) return specific_comp_cls(**mapped_comp_dict) diff --git a/src/framework/infrastructure/db_management/db_mapping.py b/src/framework/infrastructure/db_management/db_mapping.py new file mode 100644 index 0000000..6f3e1ba --- /dev/null +++ b/src/framework/infrastructure/db_management/db_mapping.py @@ -0,0 +1,7 @@ +from typing import List + + +def map_from_to(original_object: object, from_attrs: List, to_attrs: List) -> dict: + mapped = {t: getattr(original_object, f) for t, f in zip(to_attrs, from_attrs)} + + return mapped diff --git a/src/framework/infrastructure/db_management/db_structure.py b/src/framework/infrastructure/db_management/db_structure.py index 52688d2..ec777fb 100644 --- a/src/framework/infrastructure/db_management/db_structure.py +++ b/src/framework/infrastructure/db_management/db_structure.py @@ -53,10 +53,18 @@ class VolatileDataInstance(base): url_id = Column(BinaryUUID, primary_key=True) url = Column(VARCHAR(255)) component_uid = Column(BinaryUUID, ForeignKey(ComponentInstance.uid)) - price = Column(FLOAT(7, 2, False)) + cost = Column(FLOAT(7, 2, False)) availability = Column(BOOLEAN()) timestamp = Column(DATETIME(timezone=False, fsp=0)) +AttrsVolatileData = [ + 'url_id', + 'url', + 'component_uid', + 'cost', + 'availability', + 'timestamp', +] class PriceHistoryInstance(base): __tablename__ = "prices_history" From 62c938812f9ae0e4c19d10d1dc976ab733a05085 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Tue, 28 Feb 2023 09:46:01 -0300 Subject: [PATCH 3/7] conformindade lint. #8 --- .../infrastructure/db_management/db_structure.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/framework/infrastructure/db_management/db_structure.py b/src/framework/infrastructure/db_management/db_structure.py index ec777fb..1c9d47f 100644 --- a/src/framework/infrastructure/db_management/db_structure.py +++ b/src/framework/infrastructure/db_management/db_structure.py @@ -57,15 +57,17 @@ class VolatileDataInstance(base): availability = Column(BOOLEAN()) timestamp = Column(DATETIME(timezone=False, fsp=0)) + AttrsVolatileData = [ - 'url_id', - 'url', - 'component_uid', - 'cost', - 'availability', - 'timestamp', + "url_id", + "url", + "component_uid", + "cost", + "availability", + "timestamp", ] + class PriceHistoryInstance(base): __tablename__ = "prices_history" uid = Column(BinaryUUID, primary_key=True) From d96ad97275b42c1e16c908569bb3b3d5fabf615d Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Wed, 1 Mar 2023 23:25:40 -0300 Subject: [PATCH 4/7] =?UTF-8?q?implementa=C3=A7=C3=A3o=20do=20orquestrador?= =?UTF-8?q?=20de=20scraping.=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Scraper/domain/category_url.py | 14 +++ src/Scraper/domain/value_object.py | 6 +- .../ScraperOrchestration/Wrapper.py | 71 ++++++++++++ .../category_URL_manager.py | 104 ++++++++++++++++++ .../ScraperOrchestration/orchestrator.py | 64 +++++------ .../SQL_alchemy_volatile_data.py | 22 ++-- src/framework/domain/entity.py | 3 +- .../db_management/db_structure.py | 3 +- 8 files changed, 237 insertions(+), 50 deletions(-) create mode 100644 src/Scraper/domain/category_url.py create mode 100644 src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py create mode 100644 src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py diff --git a/src/Scraper/domain/category_url.py b/src/Scraper/domain/category_url.py new file mode 100644 index 0000000..a1cc6ac --- /dev/null +++ b/src/Scraper/domain/category_url.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass + +from framework.domain.entity import Entity +from framework.domain.value_object import URL +from framework.domain.components import EComponentType + + +@dataclass +class CategoryURL(Entity): + url: URL + category: EComponentType + + def __hash__(self): + return hash(self.uid) diff --git a/src/Scraper/domain/value_object.py b/src/Scraper/domain/value_object.py index afa0bfd..9c0807d 100644 --- a/src/Scraper/domain/value_object.py +++ b/src/Scraper/domain/value_object.py @@ -1,5 +1,7 @@ from typing import Tuple, List from dataclasses import dataclass + +from framework.domain.components import EComponentType from framework.domain.value_object import URL, ValueObject, Money from abc import ABC, abstractmethod @@ -7,5 +9,7 @@ @dataclass class AbstractScraper(ABC, ValueObject): @abstractmethod - def get_volatile_data(self, url: URL) -> Tuple[URL, List[Tuple[str, Money, bool]]]: + def get_volatile_data( + self, url: str + ) -> Tuple[URL, List[Tuple[URL, str, Money, bool]]]: pass diff --git a/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py b/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py new file mode 100644 index 0000000..842bb3f --- /dev/null +++ b/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py @@ -0,0 +1,71 @@ +from sqlalchemy.orm.session import Session +from typing import List +from random import uniform +import asyncio + +from SearchEngine.infrastructure.ComponentManagment.SQL_alchemy_repository import ( + SQLAlchemyRepository, +) +from Scraper.domain.value_object import AbstractScraper, URL +from Scraper.domain.category_url import CategoryURL +from Scraper.domain.aggragate import VolatileData +from Scraper.domain.service import FactoryScraper +from framework.domain.value_object import UUID +from framework.domain.components import Component +from entrypoints.api.endpoints.connection_util import engine +from framework.infrastructure.db_management.db_connection import create_session +from Scraper.infrastructure.VolatileDataManagment.SQL_alchemy_volatile_data import ( + SQLAlchemyVolatileData, +) +from Scraper.infrastructure.ScraperOrchestration.category_URL_manager import ( + CategoryURLManager, +) + + +class Wrapper: + _volatile_data_manager: SQLAlchemyVolatileData + domain: str + scraper: AbstractScraper + domain_urls: List[CategoryURL] + session: Session + + max_sleep_seconds = 3 + + def __init__(self, domain: str): + self.domain = domain + self.session = create_session(engine) + self._volatile_data_manager = SQLAlchemyVolatileData(self.session) + + factory_scraper = FactoryScraper() + url_manager = CategoryURLManager(self.session) + self.scraper = factory_scraper.build_scraper(domain) + self.domain_urls = url_manager.get(filters_eq={"domain": domain}) + + async def run_scraping(self): + for domain_url in self.domain_urls: + next_url: URL = domain_url.url + while next_url != None: + next_url, volatile_datas = self.scraper.get_volatile_data(next_url.url) + + for url, name, cost, availability in volatile_datas: + # TODO: fazer chamada da engine de busca para classificar o componente + # component = SearchEngine.classifie(name) + component_manager = SQLAlchemyRepository( + self.session + ) # placeholder + component = component_manager.get(filters_gt={"consumption": -1})[ + 0 + ] # placeholder + + volatile_data = VolatileData( + _id=UUID(url.url), + component_id=component.uid, + url=url, + cost=cost, + availability=availability, + ) + + self._volatile_data_manager.add(volatile_data) + + sleep_seconds = uniform(1.0, self.max_sleep_seconds) + await asyncio.sleep(sleep_seconds) diff --git a/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py b/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py new file mode 100644 index 0000000..27f3807 --- /dev/null +++ b/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py @@ -0,0 +1,104 @@ +from sqlalchemy.orm.session import Session +from sqlalchemy.exc import NoResultFound +from typing import List +from operator import lt, gt, eq + +from framework.domain.value_object import UUID +from framework.infrastructure.db_management.db_structure import CategoryUrlInstance +from Scraper.domain.category_url import CategoryURL +from framework.domain.value_object import URL +from framework.domain.components import EComponentType +from Scraper.domain.repositories import ( + ISQLAlchemyRepository, + EntityUIDNotFoundException, +) + + +class CategoryURLManager(ISQLAlchemyRepository): + _filters_ops: dict = {"filters_eq": eq, "filters_lt": lt, "filters_gt": gt} + + def __init__(self, session): + self._session: Session = session + + def _url_to_db(self, category_url: CategoryURL): + url_instance = CategoryUrlInstance() + url = category_url.url + + url_instance.uid = category_url.uid + url_instance.domain = url.domain + url_instance.path = url.path + url_instance.scheme = url.scheme + url_instance.type = category_url.category + + return url_instance + + def _db_to_category_url(self, url_instance: CategoryUrlInstance): + url_str = f"{url_instance.scheme}://{url_instance.domain}/{url_instance.path}" + url = URL(url_str, url_instance.scheme, url_instance.domain, url_instance.path) + category_url = CategoryURL( + _id=url_instance.uid, url=url, category=url_instance.type + ) + + return category_url + + def _parse_filters(self, **kwargs) -> List: + ret = [] + + for filter_type, filters in kwargs.items(): + if filter_type in self._filters_ops.keys(): + op = self._filters_ops[filter_type] + + [ + ret.append(op(getattr(CategoryUrlInstance, prop), value)) + for prop, value in filters.items() + ] + + return ret + + def _filter_components_from_db(self, filters: List) -> List[CategoryURL]: + url_instances: List[CategoryUrlInstance] = ( + self._session.query(CategoryUrlInstance).filter(*filters).all() + ) + + urls = [self._db_to_category_url(instance) for instance in url_instances] + + return urls + + def _add(self, category_url: CategoryURL): + url = category_url.url + url_instance = self._url_to_db(url, category_url.category) + self._session.add(url_instance) + self._session.commit() + + def _get(self, **kwargs) -> List[CategoryURL]: + ret = [] + + filters = self._parse_filters(**kwargs) + + urls = self._filter_components_from_db(filters) + ret.extend(urls) + + return ret + + def _get_by_uid(self, ref: UUID): + query_filter = [CategoryUrlInstance.uid == ref] + + try: + category_url: CategoryUrlInstance = ( + self._session.query(CategoryUrlInstance).filter(*query_filter).one() + ) + + url = self._db_to_category_url(category_url) + + except NoResultFound: + raise EntityUIDNotFoundException(ref) + + return url + + def get_domains(self) -> List[str]: + query = self._session.query(CategoryUrlInstance.domain).distinct( + CategoryUrlInstance.domain + ) + domains = [domain[0] for domain in query] + + return domains diff --git a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py index 3545d4c..e982b39 100644 --- a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py +++ b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py @@ -1,45 +1,39 @@ -from time import time -from typing import List, Tuple - -from framework.domain.value_object import UUID -from framework.domain.value_object import URL -from framework.domain.components import Component -from Scraper.domain.service import FactoryScraper -from Scraper.domain.aggragate import VolatileData -from Scraper.infrastructure.VolatileDataManagment.SQL_alchemy_volatile_data import ( - SQLAlchemyVolatile_data, +import sys + +sys.path.insert(0, r"C:\Users\wesle\OneDrive\Documentos\UFPI\ESII\WiseBuilder\src") + +import asyncio +from Scraper.infrastructure.ScraperOrchestration.category_URL_manager import ( + CategoryURLManager, ) +from framework.infrastructure.db_management.db_connection import create_session +from entrypoints.api.endpoints.connection_util import engine +from Scraper.infrastructure.ScraperOrchestration.Wrapper import Wrapper + +_category_url_manager = CategoryURLManager(create_session(engine)) +_sleep_minutes = 0.1 -seconds_between_requests = 1 -urls: List[URL] = [URL("www.google.com", "", "", "")] # category -scraper_factory = FactoryScraper() +async def run_scrapers(): + while True: + domains = _category_url_manager.get_domains() -def run_scrapers(session): - volatile_data_manager = SQLAlchemyVolatile_data(session) + tasks = [] - for category_url in urls: - # TODO: implementar temporização randomizada em um intervalo de segundos - scraper = scraper_factory.build_scraper(domain=category_url.url) - page_url: URL = category_url + for domain in domains: + wrapper = Wrapper(domain) + tasks.append(wrapper.run_scraping()) - while page_url != None: - page_url, volatile_datas_values = scraper.get_volatile_data(page_url) + await asyncio.gather(*tasks) + await asyncio.sleep(_sleep_minutes * 60) - for url, name, cost, availability in volatile_datas_values: - # TODO: fazer chamada da engine de busca para classificar o componente - # component = SearchEngine.classifie(name) - component = Component( - _id=Component.next_id(), manufacturer="1", model="2" - ) # placeholder +def main(): + orchestrator_loop = asyncio.new_event_loop() + asyncio.set_event_loop(orchestrator_loop) + orchestrator_loop.run_until_complete(run_scrapers()) + orchestrator_loop.close() - volatile_data = VolatileData( - _id=UUID(url.url), - component_id=component.uid, - url=url, - cost=cost, - availability=availability, - ) - volatile_data_manager.add(volatile_data) +if __name__ == "__main__": + main() diff --git a/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py index a8ddc38..bc31a9b 100644 --- a/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py +++ b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py @@ -1,7 +1,6 @@ from sqlalchemy.orm.session import Session -from sqlalchemy.engine import Row +from sqlalchemy.exc import NoResultFound -from framework.domain.events import DomainEvent from Scraper.domain.aggragate import VolatileData from framework.domain.value_object import UUID from framework.infrastructure.db_management.db_mapping import map_from_to @@ -9,14 +8,13 @@ VolatileDataInstance, AttrsVolatileData, ) -from sqlalchemy.exc import NoResultFound from Scraper.domain.repositories import ( ISQLAlchemyRepository, EntityUIDNotFoundException, ) -class SQLAlchemyVolatile_data(ISQLAlchemyRepository): +class SQLAlchemyVolatileData(ISQLAlchemyRepository): def __init__(self, session): self._session: Session = session @@ -42,12 +40,6 @@ def _get_instance_by_uid(self, ref: UUID) -> VolatileDataInstance: query_filter = [VolatileDataInstance.url_id == ref] try: - ctype: Row = ( - self._session.query(VolatileDataInstance.type) - .filter(*query_filter) - .one() - ) - vol_data_inst: VolatileDataInstance = ( self._session.query(VolatileDataInstance).filter(*query_filter).one() ) @@ -65,7 +57,10 @@ def _add(self, volatile_data: VolatileData): try: current_volatile_data = self._get_instance_by_uid(volatile_data.uid) - if current_volatile_data.cost > db_volatile_data.cost: + if ( + current_volatile_data.cost > db_volatile_data.cost + and db_volatile_data.availability + ): # TODO lançar evento de redução de preço pass @@ -80,4 +75,7 @@ def _get(self, **kwargs): return super()._get(**kwargs) def _get_by_uid(self, ref: UUID): - return super()._get_by_uid(ref) + volatile_data_instance = self._get_instance_by_uid(ref) + volatile_data = self.db_object_to_volatile_data(volatile_data_instance) + + return volatile_data diff --git a/src/framework/domain/entity.py b/src/framework/domain/entity.py index 67a5e29..5e456be 100644 --- a/src/framework/domain/entity.py +++ b/src/framework/domain/entity.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from typing import List, Union -from .value_object import UUID, UUIDv4 +from .value_object import UUID, UUIDv4, ValueObject from .rule import BussinessAssertionExtension from .events import DomainEvent @@ -33,3 +33,4 @@ class AggregateRoot(BussinessAssertionExtension, Entity): UniqueObject = Union[Entity, AggregateRoot] +DomainObject = Union[ValueObject, UniqueObject] diff --git a/src/framework/infrastructure/db_management/db_structure.py b/src/framework/infrastructure/db_management/db_structure.py index 1c9d47f..2d76b93 100644 --- a/src/framework/infrastructure/db_management/db_structure.py +++ b/src/framework/infrastructure/db_management/db_structure.py @@ -222,7 +222,8 @@ class ComputerInstance(base): class CategoryUrlInstance(base): __tablename__ = "category_url" - uid = Column(INTEGER(5), primary_key=True, autoincrement=False) + uid = Column(BinaryUUID, primary_key=True, autoincrement=False) + scheme = Column(VARCHAR(8)) domain = Column(VARCHAR(100)) path = Column(VARCHAR(150)) type = Column(ENUM(EComponentType)) From a8c01ac6a3e0959080f88beb924af1edc8870a45 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Thu, 2 Mar 2023 01:19:49 -0300 Subject: [PATCH 5/7] =?UTF-8?q?integra=C3=A7=C3=A3o=20com=20scrapper=20da?= =?UTF-8?q?=20loja=20Kabum.=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Scraper/domain/aggragate.py | 3 +++ src/Scraper/domain/scrapers.py | 5 ++++- src/Scraper/domain/service.py | 7 +++++-- .../infrastructure/ScraperOrchestration/Wrapper.py | 14 ++++++++------ .../ScraperOrchestration/category_URL_manager.py | 11 +++++------ .../ScraperOrchestration/orchestrator.py | 8 ++++---- .../SQL_alchemy_volatile_data.py | 10 +++++++--- src/framework/domain/value_object.py | 4 +++- 8 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/Scraper/domain/aggragate.py b/src/Scraper/domain/aggragate.py index 1676513..9571c2b 100644 --- a/src/Scraper/domain/aggragate.py +++ b/src/Scraper/domain/aggragate.py @@ -18,6 +18,9 @@ class VolatileData(AggregateRoot): timestamp: datetime = field(default=datetime.utcnow()) + def __hash__(self): + return hash(self.uid) + @classmethod def get_attrs(cls) -> List[str]: return _AttrsVolatileData.copy() diff --git a/src/Scraper/domain/scrapers.py b/src/Scraper/domain/scrapers.py index 97dffec..f5c4b5c 100644 --- a/src/Scraper/domain/scrapers.py +++ b/src/Scraper/domain/scrapers.py @@ -62,11 +62,14 @@ def get_volatile_data( n_next_page = n_actual_page + 1 + next_url: URL | None = None + if n_next_page in number_of_pages: next_page = url.split("?")[0] + self.query_string.format( page_number=n_next_page ) + next_url = URL.get_URL(next_page) else: next_page = None - return next_page, volatile_data + return next_url, volatile_data diff --git a/src/Scraper/domain/service.py b/src/Scraper/domain/service.py index 135a3fe..8cd2526 100644 --- a/src/Scraper/domain/service.py +++ b/src/Scraper/domain/service.py @@ -1,11 +1,14 @@ from dataclasses import dataclass, field from .value_object import AbstractScraper -from typing import Dict, Union, NoReturn +from typing import Dict, Union, NoReturn, Type +from Scraper.domain.scrapers import KabumScraper @dataclass class FactoryScraper: - _scrapers: Dict[str, AbstractScraper] = field(init=False, default_factory=dict) + _scrapers: Dict[str, AbstractScraper] = field( + init=False, default_factory=lambda: {KabumScraper.raw_url: KabumScraper()} + ) def build_scraper(self, domain: str) -> Union[AbstractScraper, NoReturn]: _scraper: AbstractScraper | None = self._scrapers.get(domain) diff --git a/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py b/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py index 842bb3f..e620aa8 100644 --- a/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py +++ b/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py @@ -10,8 +10,7 @@ from Scraper.domain.category_url import CategoryURL from Scraper.domain.aggragate import VolatileData from Scraper.domain.service import FactoryScraper -from framework.domain.value_object import UUID -from framework.domain.components import Component +from framework.domain.value_object import UUIDv5 from entrypoints.api.endpoints.connection_util import engine from framework.infrastructure.db_management.db_connection import create_session from Scraper.infrastructure.VolatileDataManagment.SQL_alchemy_volatile_data import ( @@ -31,21 +30,24 @@ class Wrapper: max_sleep_seconds = 3 - def __init__(self, domain: str): + def __init__(self, scheme: str, domain: str): self.domain = domain self.session = create_session(engine) self._volatile_data_manager = SQLAlchemyVolatileData(self.session) factory_scraper = FactoryScraper() url_manager = CategoryURLManager(self.session) - self.scraper = factory_scraper.build_scraper(domain) + self.scraper = factory_scraper.build_scraper(f"{scheme}://{domain}") self.domain_urls = url_manager.get(filters_eq={"domain": domain}) async def run_scraping(self): for domain_url in self.domain_urls: next_url: URL = domain_url.url + while next_url != None: - next_url, volatile_datas = self.scraper.get_volatile_data(next_url.url) + next_url, volatile_datas = self.scraper.get_volatile_data( + url=next_url.url + ) for url, name, cost, availability in volatile_datas: # TODO: fazer chamada da engine de busca para classificar o componente @@ -58,7 +60,7 @@ async def run_scraping(self): ] # placeholder volatile_data = VolatileData( - _id=UUID(url.url), + _id=UUIDv5(url.url), component_id=component.uid, url=url, cost=cost, diff --git a/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py b/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py index 27f3807..25797ce 100644 --- a/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py +++ b/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py @@ -95,10 +95,9 @@ def _get_by_uid(self, ref: UUID): return url - def get_domains(self) -> List[str]: - query = self._session.query(CategoryUrlInstance.domain).distinct( - CategoryUrlInstance.domain - ) - domains = [domain[0] for domain in query] + def get_urls(self) -> List[tuple[str, str]]: + params = [CategoryUrlInstance.scheme, CategoryUrlInstance.domain] - return domains + query = self._session.query(*params).distinct(CategoryUrlInstance.domain) + urls = [url for url in query] + return urls diff --git a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py index e982b39..a305dd1 100644 --- a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py +++ b/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py @@ -11,17 +11,17 @@ from Scraper.infrastructure.ScraperOrchestration.Wrapper import Wrapper _category_url_manager = CategoryURLManager(create_session(engine)) -_sleep_minutes = 0.1 +_sleep_minutes = 60 async def run_scrapers(): while True: - domains = _category_url_manager.get_domains() + urls = _category_url_manager.get_urls() tasks = [] - for domain in domains: - wrapper = Wrapper(domain) + for scheme, domain in urls: + wrapper = Wrapper(scheme, domain) tasks.append(wrapper.run_scraping()) await asyncio.gather(*tasks) diff --git a/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py index bc31a9b..68d1dda 100644 --- a/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py +++ b/src/Scraper/infrastructure/VolatileDataManagment/SQL_alchemy_volatile_data.py @@ -1,5 +1,6 @@ from sqlalchemy.orm.session import Session from sqlalchemy.exc import NoResultFound +from sqlalchemy import update from Scraper.domain.aggragate import VolatileData from framework.domain.value_object import UUID @@ -54,19 +55,22 @@ def _add(self, volatile_data: VolatileData): volatile_data ) + db_volatile_data.url = volatile_data.url.url # TODO modificar dicionário + db_volatile_data.cost = volatile_data.cost.amount + try: current_volatile_data = self._get_instance_by_uid(volatile_data.uid) if ( - current_volatile_data.cost > db_volatile_data.cost + db_volatile_data.cost + 0.1 < current_volatile_data.cost and db_volatile_data.availability ): # TODO lançar evento de redução de preço pass - current_volatile_data.__dict__.update(db_volatile_data.__dict__) + current_volatile_data.cost = db_volatile_data.cost - except NoResultFound: + except EntityUIDNotFoundException: self._session.add(db_volatile_data) self._session.commit() diff --git a/src/framework/domain/value_object.py b/src/framework/domain/value_object.py index fe78abf..8fd63a7 100644 --- a/src/framework/domain/value_object.py +++ b/src/framework/domain/value_object.py @@ -1,4 +1,5 @@ import uuid +from functools import partial from urllib.parse import urlsplit, SplitResult from dataclasses import dataclass from functools import total_ordering @@ -6,10 +7,11 @@ from .rule import Rule, BussinessAssertionExtension -__all__ = ["UUID", "UUIDv4", "ValueObject", "Money", "URL"] +__all__ = ["UUID", "UUIDv4", "UUIDv5", "ValueObject", "Money", "URL"] UUID = uuid.UUID UUIDv4 = uuid.uuid4 +UUIDv5 = partial(uuid.uuid5, uuid.NAMESPACE_URL) class ValueObject(BussinessAssertionExtension): From 293221ab7edb9e4569bc53e98aa3ea78d22e12b1 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Thu, 2 Mar 2023 01:30:39 -0300 Subject: [PATCH 6/7] move ScraperOrquestration/ de infrastructure para application. #8 --- .../ScraperOrchestration/Wrapper.py | 0 .../ScraperOrchestration/category_URL_manager.py | 0 .../ScraperOrchestration/orchestrator.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename src/Scraper/{infrastructure => application}/ScraperOrchestration/Wrapper.py (100%) rename src/Scraper/{infrastructure => application}/ScraperOrchestration/category_URL_manager.py (100%) rename src/Scraper/{infrastructure => application}/ScraperOrchestration/orchestrator.py (100%) diff --git a/src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py b/src/Scraper/application/ScraperOrchestration/Wrapper.py similarity index 100% rename from src/Scraper/infrastructure/ScraperOrchestration/Wrapper.py rename to src/Scraper/application/ScraperOrchestration/Wrapper.py diff --git a/src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py b/src/Scraper/application/ScraperOrchestration/category_URL_manager.py similarity index 100% rename from src/Scraper/infrastructure/ScraperOrchestration/category_URL_manager.py rename to src/Scraper/application/ScraperOrchestration/category_URL_manager.py diff --git a/src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py b/src/Scraper/application/ScraperOrchestration/orchestrator.py similarity index 100% rename from src/Scraper/infrastructure/ScraperOrchestration/orchestrator.py rename to src/Scraper/application/ScraperOrchestration/orchestrator.py From 5b9097ed7606faa0e2e9fa404bff8ae54499d516 Mon Sep 17 00:00:00 2001 From: Wesley Vitor Date: Thu, 2 Mar 2023 01:36:17 -0300 Subject: [PATCH 7/7] =?UTF-8?q?pequenas=20altera=C3=A7=C3=B5es.=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Scraper/domain/category_url.py | 14 -------------- src/Scraper/domain/entity.py | 21 +++++++++++++-------- src/SearchEngine/domain/repositories.py | 1 - 3 files changed, 13 insertions(+), 23 deletions(-) delete mode 100644 src/Scraper/domain/category_url.py diff --git a/src/Scraper/domain/category_url.py b/src/Scraper/domain/category_url.py deleted file mode 100644 index a1cc6ac..0000000 --- a/src/Scraper/domain/category_url.py +++ /dev/null @@ -1,14 +0,0 @@ -from dataclasses import dataclass - -from framework.domain.entity import Entity -from framework.domain.value_object import URL -from framework.domain.components import EComponentType - - -@dataclass -class CategoryURL(Entity): - url: URL - category: EComponentType - - def __hash__(self): - return hash(self.uid) diff --git a/src/Scraper/domain/entity.py b/src/Scraper/domain/entity.py index 0433dc3..a1cc6ac 100644 --- a/src/Scraper/domain/entity.py +++ b/src/Scraper/domain/entity.py @@ -1,9 +1,14 @@ +from dataclasses import dataclass + +from framework.domain.entity import Entity +from framework.domain.value_object import URL +from framework.domain.components import EComponentType + + @dataclass -class MatchesTrackedComponent(Rule): - # Verifica se o componente é um dos componentes observados - component_name: str - - def is_broken(self) -> bool: - # verificar se componente existe - # return not SearchEngine.get_id_by_name(component_name) - return False +class CategoryURL(Entity): + url: URL + category: EComponentType + + def __hash__(self): + return hash(self.uid) diff --git a/src/SearchEngine/domain/repositories.py b/src/SearchEngine/domain/repositories.py index f60315a..5addb34 100644 --- a/src/SearchEngine/domain/repositories.py +++ b/src/SearchEngine/domain/repositories.py @@ -6,7 +6,6 @@ from framework.domain.repository import AbstractRepository from framework.domain.exception import DomainException from framework.domain.components import Component -from Scraper.domain.aggragate import VolatileData @dataclass