Skip to content

Commit

Permalink
(#29)
Browse files Browse the repository at this point in the history
* Improve `Article` __repr__ method
* Improve logs
* Modify attributes names in `Cluster`
* Add function create_new_cluster into `ClusterUtils`
* Improve `get_similar_articles` route
  • Loading branch information
kggold4 committed Jun 7, 2023
1 parent bab2b5e commit 2611046
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 21 deletions.
2 changes: 1 addition & 1 deletion api/server_api/api_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_similar_articles_data(self, article_url: str) -> List[ArticleApiData]:
# collect needed articles data
self.server_logger.info(f"Got {len(articles)} similar articles")
for index, article in enumerate(articles):
self.server_logger.debug(f"({index + 1}) article data: `{article.convert_to_dict()}`")
self.server_logger.debug(f"({index + 1}) Article data: `{str(article)}`")
article_api_object = self.__get_convert_article_api_data(article=article)
similar_articles.append(article_api_object)
except Exception as e:
Expand Down
33 changes: 20 additions & 13 deletions api/server_api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from api.server_api import app
from api.server_api.api_logic import APILogic
from api.server_api.exceptions import ArticleNotFoundException, NoSimilarArticlesException, GetSimilarArticlesException
from logger import get_current_logger


@app.route('/')
Expand All @@ -13,20 +14,26 @@ def index():

@app.route('/get_similar_articles', methods=['POST'])
def get_similar_articles():
logger = get_current_logger()
logger.debug(f"Try getting similar articles")
return_data = {"articles_data": list(), "error_msg": "", "succeeded": False}
if 'url' not in request.args:
return_data["error_msg"] = "url required"
return return_data
try:
api_logic = APILogic()
article_url: str = request.args['url']
similar_articles_data = api_logic.get_similar_articles_data(article_url=article_url)
return_data["articles_data"] = similar_articles_data
return_data["succeeded"] = True
except ArticleNotFoundException:
return_data["error_msg"] = "article not found in db"
except NoSimilarArticlesException:
return_data["error_msg"] = "no similar articles found"
except GetSimilarArticlesException:
return_data["error_msg"] = "error getting similar articles"
logger.logger.warning(f"Didn't get the current url of the article")
else:
try:
api_logic = APILogic()
article_url: str = request.args['url']
similar_articles_data = api_logic.get_similar_articles_data(article_url=article_url)
return_data["articles_data"] = similar_articles_data
return_data["succeeded"] = True
logger.info(f"Got {len(similar_articles_data)} similar articles")
except ArticleNotFoundException:
return_data["error_msg"] = "article not found in db"
except NoSimilarArticlesException:
return_data["error_msg"] = "no similar articles found"
except GetSimilarArticlesException:
return_data["error_msg"] = "error getting similar articles"

logger.info(f"(get_similar_articles) return data: `{return_data}`")
return return_data
4 changes: 1 addition & 3 deletions db_driver/db_objects/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ class Article:
images: Optional[List[str]] = None

def __repr__(self) -> str:
string = ''
for prop, value in vars(self).items():
string += f"{str(prop)}: {str(value)}\n"
string = f'(domain: `{self.domain}`, url: `{self.url}`, title: `{self.title}`)'
return string

def convert_to_dict(self) -> dict:
Expand Down
4 changes: 2 additions & 2 deletions db_driver/db_objects/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class Cluster:
main_article_id: str
creation_time: datetime
last_updated: datetime
websites: list[str]
category: Optional[str] = None
domains: list[str]
categories: Optional[str] = None

def convert_to_dict(self) -> dict:
return asdict(self)
1 change: 1 addition & 0 deletions server_utils/db_utils/article_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def get_article(self, article_url: str) -> Union[Article, None]:
article = article_object
except DataNotFoundDBException as e:
self.logger.warning(f"Error get article by article url: `{article_url}` - {str(e)}")
self.logger.info(f"Got article from db, article_id: `{article.article_id}`, url: `{article.url}`")
return article

def get_articles(self, articles_id: List[str]) -> List[Article]:
Expand Down
40 changes: 38 additions & 2 deletions server_utils/db_utils/cluster_utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,53 @@
import uuid
from datetime import datetime
from typing import List

from db_driver import get_current_db_driver
from db_driver.db_objects.article import Article
from db_driver.db_objects.cluster import Cluster
from db_driver.db_objects.db_objects_utils import get_db_object_from_dict
from db_driver.utils.consts import DBConsts
# from logger import get_current_logger
from logger import get_current_logger, log_function
from server_utils.db_utils.article_utils import ArticleUtils


class ClusterUtils:
def __init__(self):
# self.logger = get_current_logger()
self.logger = get_current_logger()
self._db = get_current_db_driver()

@log_function
def create_new_cluster(self, article: Article, classified_categories: List[str] = None) -> str:
try:
current_time = datetime.now()
categories = classified_categories if classified_categories else list()
cluster_data = {
"cluster_id": str(uuid.uuid4()),
"articles_id": [article.article_id],
"main_article_id": article.article_id,
"creation_time": current_time,
"last_updated": current_time,
"domains": [article.domain],
"categories": categories
}
cluster: Cluster = Cluster(**cluster_data)
_id = self._db.insert_one(table_name=DBConsts.CLUSTERS_TABLE_NAME, data=cluster.convert_to_dict())
self.logger.info(f"Inserted cluster inserted_id: `{_id}`, cluster_id: `{cluster.cluster_id}`")
return _id
except Exception as e:
desc = f"Error insert cluster - {str(e)}"
self.logger.exception(desc)

def get_cluster(self, cluster_id: str) -> Cluster:
data_filter = {"cluster_id": cluster_id}
cluster_data = self._db.get_one(table_name=DBConsts.CLUSTERS_TABLE_NAME, data_filter=data_filter)
cluster_object: Cluster = get_db_object_from_dict(object_dict=cluster_data, class_instance=Cluster)
return cluster_object


# For debug
if __name__ == '__main__':
article_utils = ArticleUtils()
new_article: Article = article_utils.get_article(article_url="https://www.bbc.com/news/world-europe-65471904")
cluster_utils = ClusterUtils()
cluster_utils.create_new_cluster(article=new_article, classified_categories=["finance", "bitcoin"])

0 comments on commit 2611046

Please sign in to comment.