From 15b2c3a38a4883d1fca9cd04e7c2e8aca9c20afa Mon Sep 17 00:00:00 2001 From: kggold4 Date: Thu, 8 Jun 2023 09:23:08 +0300 Subject: [PATCH] (#29) * Separate `get_article` function to `get_article_by_id` and `get_article_by_url` * Fix `get_many` function in `mongodb_driver` --- api/server_api/api_logic.py | 2 +- db_driver/mongodb_driver.py | 2 +- server_utils/db_utils/article_utils.py | 15 ++++++++++++++- server_utils/db_utils/cluster_utils.py | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/api/server_api/api_logic.py b/api/server_api/api_logic.py index f785c9d..1665833 100644 --- a/api/server_api/api_logic.py +++ b/api/server_api/api_logic.py @@ -25,7 +25,7 @@ def get_similar_articles_data(self, article_url: str) -> List[ArticleApiData]: """ self.server_logger.debug(f"Checking similar articles to article -> `{article_url}`") similar_articles: List[ArticleApiData] = list() - article_object: Article = self._article_utils.get_article(article_url=article_url) + article_object: Article = self._article_utils.get_article_by_url(article_url=article_url) if not article_object: desc = f"Didn't find article in db with article url: `{article_url}`" diff --git a/db_driver/mongodb_driver.py b/db_driver/mongodb_driver.py index db66a20..704cce8 100644 --- a/db_driver/mongodb_driver.py +++ b/db_driver/mongodb_driver.py @@ -194,7 +194,7 @@ def get_many(self, table_name: str, data_filter: dict) -> List[dict]: if res: object_id = res.cursor_id self.logger.info(f"Got data from db: '{self.DB_NAME}', table_name: '{table_name}', id: '{object_id}'") - return list(dict(res)) + return list(res) else: desc = f"Error find data with filter: {data_filter}, table: '{table_name}', db: '{self.DB_NAME}'" self.logger.warning(desc) diff --git a/server_utils/db_utils/article_utils.py b/server_utils/db_utils/article_utils.py index 19d8813..9eebf55 100644 --- a/server_utils/db_utils/article_utils.py +++ b/server_utils/db_utils/article_utils.py @@ -57,7 +57,19 @@ def get_unclassified_article(self, required_filter_data: dict = None, get_random return Article(**article) - def get_article(self, article_url: str) -> Union[Article, None]: + def get_article_by_id(self, article_id: str) -> Union[Article, None]: + article = None + data_filter = {"article_id": article_id} + try: + article_data = self._db.get_one(table_name=DBConsts.ARTICLE_TABLE_NAME, data_filter=data_filter) + article_object: Article = get_db_object_from_dict(object_dict=article_data, class_instance=Article) + article = article_object + except DataNotFoundDBException as e: + self.logger.warning(f"Error get article by article id: `{article_id}` - {str(e)}") + self.logger.info(f"Got article from db, article_id: `{article.article_id}`, url: `{article.url}`") + return article + + def get_article_by_url(self, article_url: str) -> Union[Article, None]: article = None data_filter = {"url": article_url} try: @@ -70,6 +82,7 @@ def get_article(self, article_url: str) -> Union[Article, None]: return article def get_articles(self, articles_id: List[str]) -> List[Article]: + # todo: separate this function to: get_articles_by_ids and get_articles_by_urls articles: List[Article] = [] data_filter = {"article_id": {"$in": articles_id}} articles_data = self._db.get_many(table_name=DBConsts.ARTICLE_TABLE_NAME, data_filter=data_filter) diff --git a/server_utils/db_utils/cluster_utils.py b/server_utils/db_utils/cluster_utils.py index 8d21262..3acb931 100644 --- a/server_utils/db_utils/cluster_utils.py +++ b/server_utils/db_utils/cluster_utils.py @@ -48,6 +48,6 @@ def get_cluster(self, cluster_id: str) -> Cluster: # For debug if __name__ == '__main__': article_utils = ArticleUtils() - new_article: Article = article_utils.get_article(article_url="https://www.bbc.com/news/world-europe-65471904") + new_article: Article = article_utils.get_article_by_url(article_url="https://www.bbc.com/news/world-europe-65471904") cluster_utils = ClusterUtils() cluster_utils.create_new_cluster(article=new_article, classified_categories=["finance", "bitcoin"])