From b8901abaf9461316fce916a78e679acf24ae5e8a Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:27:57 -0300 Subject: [PATCH 01/19] Coloca _get_user em um lugar comum aos aplicativos --- core/utils/get_user.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 core/utils/get_user.py diff --git a/core/utils/get_user.py b/core/utils/get_user.py new file mode 100644 index 00000000..8b101694 --- /dev/null +++ b/core/utils/get_user.py @@ -0,0 +1,13 @@ +from django.contrib.auth import get_user_model + + +User = get_user_model() + +def _get_user(request, username=None, user_id=None): + try: + return User.objects.get(pk=request.user.id) + except AttributeError: + if user_id: + return User.objects.get(pk=user_id) + if username: + return User.objects.get(username=username) \ No newline at end of file From 490530f6865c7e92868a515cf23da3dc626b8509 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:30:30 -0300 Subject: [PATCH 02/19] realiza mudanca do _get_user --- bigbang/tasks.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/bigbang/tasks.py b/bigbang/tasks.py index a45a2de4..3ed0d6ee 100644 --- a/bigbang/tasks.py +++ b/bigbang/tasks.py @@ -1,19 +1,11 @@ import logging -from django.contrib.auth import get_user_model +from core.utils.get_user import _get_user from bigbang import tasks_scheduler from bigbang.setup import setup from config import celery_app -User = get_user_model() - - -def _get_user(user_id, username): - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) @celery_app.task(bind=True) @@ -36,7 +28,7 @@ def task_setup( file_path=None, config=None, ): - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) if file_path or config: setup(user, file_path, config) From 793a84e04c4c18d7ecd0c4b7ea9382d26f545c6f Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:32:46 -0300 Subject: [PATCH 03/19] realiza mudanca do _get_user --- migration/tasks.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/migration/tasks.py b/migration/tasks.py index e044a265..04bc97fe 100644 --- a/migration/tasks.py +++ b/migration/tasks.py @@ -1,10 +1,10 @@ import logging import sys -from django.contrib.auth import get_user_model from django.utils.translation import gettext_lazy as _ from collection.models import Collection +from core.utils.get_user import _get_user from config import celery_app from htmlxml.models import HTMLXML from proc.models import ArticleProc, IssueProc, JournalProc @@ -12,27 +12,6 @@ from . import controller -User = get_user_model() - - -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "migration.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - def _get_collections(collection_acron): try: @@ -126,7 +105,7 @@ def task_migrate_title_record( Cria um registro MigratedData (source="journal") """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) collection = Collection.get(acron=collection_acron) JournalProc.register_classic_website_data( user, @@ -227,7 +206,7 @@ def task_migrate_issue_record( Cria um registro MigratedData (source="issue") """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) collection = Collection.get(acron=collection_acron) IssueProc.register_classic_website_data( @@ -305,7 +284,7 @@ def task_import_one_issue_files( force_update=False, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.get_files_from_classic_website( user, force_update, controller.import_one_issue_files @@ -379,7 +358,7 @@ def task_import_one_issue_document_records( Cria ou atualiza os registros de ArticleProc """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.get_article_records_from_classic_website( user, force_update, controller.get_article_records_from_classic_website @@ -446,7 +425,7 @@ def task_get_xml( body_and_back_xml=None, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) try: From 48118d42df035d0f8fad5305bd5b24170b2ed56c Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:33:06 -0300 Subject: [PATCH 04/19] realiza mudanca do _get_user --- pid_provider/tasks.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index ed320246..56e5093f 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -1,25 +1,13 @@ import logging -from django.contrib.auth import get_user_model from config import celery_app +from core.utils.get_user import _get_user from pid_provider.provider import PidProvider from pid_provider.requester import PidRequester from proc.models import ArticleProc -User = get_user_model() - - -def _get_user(request, username=None, user_id=None): - try: - return User.objects.get(pk=request.user.id) - except AttributeError: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - @celery_app.task(bind=True, name="provide_pid_for_file") def provide_pid_for_file( From 783ae05c9db86cfe71b5b8113bb338140ba8c520 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:33:26 -0300 Subject: [PATCH 05/19] realiza mudanca do _get_user --- proc/tasks.py | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/proc/tasks.py b/proc/tasks.py index 034fb6fb..ecee3ce8 100644 --- a/proc/tasks.py +++ b/proc/tasks.py @@ -5,33 +5,13 @@ from django.utils.translation import gettext_lazy as _ from collection.models import Collection +from core.utils.get_user import _get_user from config import celery_app from proc.models import ArticleProc, IssueProc, JournalProc from tracker.models import UnexpectedEvent from . import controller -User = get_user_model() - - -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "proc.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - def _get_collections(collection_acron): try: @@ -133,7 +113,7 @@ def task_create_or_update_journal( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = JournalProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_journal @@ -230,7 +210,7 @@ def task_create_or_update_issue( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_issue @@ -315,7 +295,7 @@ def task_generate_sps_package( force_core_update=None, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) if force_core_update and item.sps_pkg: item.sps_pkg.set_registered_in_core(False) @@ -416,7 +396,7 @@ def task_create_or_update_article( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_article @@ -474,6 +454,6 @@ def subtask_synchronize_to_pid_provider( user_id=None, item_id=None ): - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) item.synchronize(user) From 0e7779d43f8cccba73ea88e97fb76369f11a99bf Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:34:01 -0300 Subject: [PATCH 06/19] realiza mudanca do _get_user e remove _get_user em task_publish_model_inline inutilizavel --- publication/tasks.py | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/publication/tasks.py b/publication/tasks.py index cfab76fc..f7c6dcce 100644 --- a/publication/tasks.py +++ b/publication/tasks.py @@ -1,7 +1,6 @@ import logging import sys -from django.contrib.auth import get_user_model from django.utils.translation import gettext_lazy as _ from collection.choices import QA @@ -9,6 +8,7 @@ from config import celery_app from proc.models import ArticleProc, IssueProc, JournalProc from core.models import PressRelease +from core.utils.get_user import _get_user from publication.api.document import publish_article from publication.api.issue import publish_issue from publication.api.journal import publish_journal @@ -16,7 +16,6 @@ from publication.api.publication import PublicationAPI from tracker.models import UnexpectedEvent -User = get_user_model() SCIELO_MODELS = { "journal": JournalProc, @@ -33,25 +32,6 @@ } -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "migration.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - - def _get_collections(collection_acron): try: if collection_acron: @@ -220,7 +200,7 @@ def task_publish_model( website_kind = website_kind or QA model_name = model_name or "article" collection = Collection.get(acron=collection_acron) - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) SciELOModel = SCIELO_MODELS.get(model_name) @@ -253,7 +233,7 @@ def task_publish_item( ): try: item = None - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) SciELOModel = SCIELO_MODELS.get(model_name) item = SciELOModel.objects.get(pk=item_id) item.publish(user, PUBLISH_FUNCTIONS.get(model_name), website_kind, api_data) @@ -298,7 +278,6 @@ def task_publish_model_inline( ): website_kind = website_kind or QA collection = Collection.get(acron=collection_acron) - user = _get_user(user_id, username) website = WebSiteConfiguration.get( collection=collection, From 5582b74f892da3a7b63f6d2a4ab19f7c811ed4b9 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 17 May 2024 11:34:15 -0300 Subject: [PATCH 07/19] realiza mudanca do _get_user --- upload/tasks.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/upload/tasks.py b/upload/tasks.py index 50797046..087a38f2 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -1,7 +1,6 @@ import json from celery.result import AsyncResult -from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ from packtools.sps import exceptions as sps_exceptions from packtools.sps.models import package as sps_package @@ -13,6 +12,7 @@ from article.choices import AS_CHANGE_SUBMITTED from article.controller import create_article_from_etree, update_article from article.models import Article +from core.utils.get_user import _get_user from config import celery_app from issue.models import Issue from journal.controller import get_journal_dict_for_validation @@ -23,9 +23,6 @@ from upload.models import Package -User = get_user_model() - - def run_validations( filename, package_id, package_category, article_id=None, issue_id=None ): @@ -528,13 +525,6 @@ def task_get_or_create_package(pid_v3, user_id): ).id -def _get_user(request, user_id): - try: - return User.objects.get(pk=request.user.id) - except AttributeError: - return User.objects.get(pk=user_id) - - @celery_app.task(bind=True, name="request_pid_for_accepted_packages") def task_request_pid_for_accepted_packages(self, user_id): user = _get_user(self.request, user_id) From 91a5b6c9140457146f793ed382b74875725beb4b Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:41:46 -0300 Subject: [PATCH 08/19] Cria os modelos e seus metodos de classe (CheckArticleAvailability, ScieloSiteStatus) --- article/models.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/article/models.py b/article/models.py index 2c59a1f1..02772564 100644 --- a/article/models.py +++ b/article/models.py @@ -1,4 +1,5 @@ import logging +import datetime from django.contrib.auth import get_user_model from django.db import models @@ -314,3 +315,138 @@ def __str__(self) -> str: return f"{self.article or self.pid_v3} - {self.deadline}" base_form_class = RequestArticleChangeForm + + +class CheckArticleAvailability(CommonControlField): + """ + Modelo para armazenar o status de disponibilidade nos sites, + tanto na nova versao, quanto na antiga, do scielo.br. + """ + article = models.ForeignKey( + Article, + on_delete=models.SET_NULL, + null=True, + ) + site_status = models.ManyToManyField( + "ScieloSiteStatus" + ) + + def __str__(self): + return f"{self.article.pid_v3}" + + @classmethod + def get(cls, article): + return cls.objects.get(article=article) + + def create_or_update_scielo_site_status( + self, + url, + status, + user + ): + obj = ScieloSiteStatus.create_or_update( + url=url, + status=status, + user=user, + ) + self.site_status.add(obj) + self.save() + + + @classmethod + def create( + cls, + article, + status, + url, + user, + + ): + obj = cls( + article=article, + creator=user, + ) + obj.save() + obj.create_or_update_scielo_site_status( + url=url, + status=status, + user=user, + ) + return obj + + @classmethod + def create_or_update(cls, + article, + status, + url, + user, + ): + try: + obj = cls.get(article=article) + obj.create_or_update_scielo_site_status( + url=url, + status=status, + user=user, + ) + return obj + except cls.DoesNotExist: + cls.create( + article=article, + status=status, + url=url, + user=user + ) + +class ScieloSiteStatus(CommonControlField): + check_date = models.DateTimeField(null=True, blank=True) + url_site_scielo = models.SlugField(max_length=500, unique=True) + available = models.BooleanField(default=False) + + def update( + self, + status, + ): + self.check_date = datetime.datetime.now() + self.available = status + self.save() + return self + + + @classmethod + def get(cls, url): + return cls.objects.get(url_site_scielo=url) + + + @classmethod + def create( + cls, + url, + status, + user, + ): + obj = cls( + check_date=datetime.datetime.now(), + url_site_scielo=url, + available=status, + creator=user + ) + obj.save() + return obj + + @classmethod + def create_or_update( + cls, + url, + status, + user, + ): + try: + obj = cls.get(url=url) + obj.update(status=status) + return obj + except cls.DoesNotExist: + return cls.create( + url=url, + status=status, + user=user, + ) \ No newline at end of file From 4a6b2e82cf5894e774200e5310d37008472915b8 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:46:58 -0300 Subject: [PATCH 09/19] Insere o paramentro date nos metodos das classes CheckArticleAvailability e ScieloSiteStatus --- article/models.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/article/models.py b/article/models.py index 02772564..f276f543 100644 --- a/article/models.py +++ b/article/models.py @@ -342,11 +342,13 @@ def create_or_update_scielo_site_status( self, url, status, - user + user, + date=None, ): obj = ScieloSiteStatus.create_or_update( url=url, status=status, + date=date, user=user, ) self.site_status.add(obj) @@ -360,7 +362,7 @@ def create( status, url, user, - + date=None, ): obj = cls( article=article, @@ -371,6 +373,7 @@ def create( url=url, status=status, user=user, + date=date, ) return obj @@ -379,13 +382,15 @@ def create_or_update(cls, article, status, url, - user, + user, + date=None, ): try: obj = cls.get(article=article) obj.create_or_update_scielo_site_status( url=url, status=status, + date=date, user=user, ) return obj @@ -394,6 +399,7 @@ def create_or_update(cls, article=article, status=status, url=url, + date=date, user=user ) @@ -405,8 +411,9 @@ class ScieloSiteStatus(CommonControlField): def update( self, status, + date=None, ): - self.check_date = datetime.datetime.now() + self.check_date = date or datetime.datetime.now() self.available = status self.save() return self @@ -423,9 +430,11 @@ def create( url, status, user, + date=None, ): + date = date or datetime.datetime.now() obj = cls( - check_date=datetime.datetime.now(), + check_date=date, url_site_scielo=url, available=status, creator=user @@ -439,14 +448,19 @@ def create_or_update( url, status, user, + date=None, ): try: obj = cls.get(url=url) - obj.update(status=status) + obj.update( + status=status, + date=date + ) return obj except cls.DoesNotExist: return cls.create( url=url, status=status, user=user, + date=date ) \ No newline at end of file From 4db2b20a40f61c3607befea9a0ddeabc8cccba07 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:52:39 -0300 Subject: [PATCH 10/19] Cria tasks que realizam a verificacao da disponibilidade do artigo no site --- article/tasks.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 article/tasks.py diff --git a/article/tasks.py b/article/tasks.py new file mode 100644 index 00000000..76d9750c --- /dev/null +++ b/article/tasks.py @@ -0,0 +1,89 @@ +import sys +from django.db.models import Q + +from config import celery_app +from core.utils.get_user import _get_user +from core.utils.requester import fetch_data +from article.models import CheckArticleAvailability, Article +from collection.models import Collection +from tracker.models import UnexpectedEvent + +@celery_app.task(bind=True) +def initiate_article_availability_check( + self, + username, + user_id, + issn_print=None, + issn_electronic=None, + publication_year=None, + updated=None, + article_pid_v3=None, + collection_acron=None, +): + if collection_acron: + collection = Collection.objects.filter(acron=collection_acron) + else: + collection = Collection.objects.all() + + query = Q(journal__journalproc__collection__in=collection) + if not updated: + if article_pid_v3: + query |= Q(pid_v3=article_pid_v3) + if issn_print: + query |= Q(journal__official_journal__issn_print=issn_print) + if issn_electronic: + query |= Q(journal__official_journal__issn_electronic=issn_electronic) + if publication_year: + query |= Q(issue__publication_year=publication_year) + + articles = Article.objects.filter(query) + + for article in articles.iterator(): + for article_per_lang in ['pt', 'en']: + process_article_availability.apply_async( + kwargs=dict( + user_id=user_id, + username=username, + pid_v3=article.pid_v3, + journal_acron=article.journal.journal_acron, + lang=article_per_lang, + ) + ) + + +@celery_app.task(bind=True) +def process_article_availability(self, user_id, username, pid_v3, journal_acron, lang): + urls = [ + f"https://www.scielo.br/scielo.php?script=sci_arttext&pid={pid_v3}&lng={lang}&nrm=iso", + f"https://www.scielo.br/j/{journal_acron}/a/{pid_v3}/?lang={lang}" + ] + try: + user = _get_user(self.request, user_id=user_id, username=username) + article = Article.objects.get(pid_v3=pid_v3) + + for url in urls: + try: + response = fetch_data(url, timeout=2, verify=True) + CheckArticleAvailability.create_or_update( + article=article, + status=True, + url=url, + user=user, + ) + except Exception as e: + CheckArticleAvailability.create_or_update( + article=article, + status=False, + url=url, + user=user, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + e=e, + exc_traceback=exc_traceback, + detail={ + "function": "article.tasks.process_article_availability", + "urls": urls + }, + ) \ No newline at end of file From 627c144578d32968805d3d6c183bdfa380e18750 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:52:55 -0300 Subject: [PATCH 11/19] Cria ScieloSiteStatusAdmin --- article/wagtail_hooks.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index 4e28b5fa..1c2803b4 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -14,7 +14,7 @@ from config.menu import get_menu_order from .button_helper import ArticleButtonHelper, RequestArticleChangeButtonHelper -from .models import Article, RelatedItem, RequestArticleChange, choices +from .models import Article, RelatedItem, RequestArticleChange, choices, ScieloSiteStatus from .permission_helper import ArticlePermissionHelper # from upload import exceptions as upload_exceptions @@ -243,8 +243,33 @@ class ArticleModelAdminGroup(ModelAdminGroup): # RequestArticleChangeModelAdmin, ) +class ScieloSiteStatusAdmin(ModelAdmin): + model = ScieloSiteStatus + menu_label = "Scielo Site Status" + menu_icon = "doc-full" + list_display = ( + "article", + "url_site_scielo", + "available", + "check_date", + ) + search_fields= ( + "url_site_scielo", + "checkarticleavailability__article__pid_v3" + ) + menu_order = 200 + add_to_settings_menu = False + exclude_from_explorer = False + + def article(self, obj): + return list(obj.checkarticleavailability_set.all()) + + + def get_queryset(self, request): + return super().get_queryset(request).filter(available=False) + -# modeladmin_register(ArticleModelAdminGroup) +modeladmin_register(ScieloSiteStatusAdmin) modeladmin_register(ArticleModelAdmin) From 465f4a63fb414fa8ddb7eadc73b473cfa1ad98d9 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:53:06 -0300 Subject: [PATCH 12/19] script para executar initiate_article_availability_check --- article/scripts/load_check_article.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 article/scripts/load_check_article.py diff --git a/article/scripts/load_check_article.py b/article/scripts/load_check_article.py new file mode 100644 index 00000000..98a15ae3 --- /dev/null +++ b/article/scripts/load_check_article.py @@ -0,0 +1,11 @@ +from article.tasks import initiate_article_availability_check + + +def run(pid_v3, username=None, user_id=None): + initiate_article_availability_check.apply_async( + kwargs=dict( + username=username, + user_id=user_id, + article_pid_v3=pid_v3, + ) + ) \ No newline at end of file From c47344ce4a66eee86d6cfb25420c2d81491598f9 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:53:11 -0300 Subject: [PATCH 13/19] migracao --- ...ielositestatus_checkarticleavailability.py | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 article/migrations/0002_scielositestatus_checkarticleavailability.py diff --git a/article/migrations/0002_scielositestatus_checkarticleavailability.py b/article/migrations/0002_scielositestatus_checkarticleavailability.py new file mode 100644 index 00000000..a0510976 --- /dev/null +++ b/article/migrations/0002_scielositestatus_checkarticleavailability.py @@ -0,0 +1,129 @@ +# Generated by Django 5.0.3 on 2024-05-21 00:43 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("article", "0001_initial"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ScieloSiteStatus", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ("check_date", models.DateTimeField(blank=True, null=True)), + ("url_site_scielo", models.SlugField(max_length=500, unique=True)), + ("available", models.BooleanField(default=False)), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="CheckArticleAvailability", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "article", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="article.article", + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ("site_status", models.ManyToManyField(to="article.scielositestatus")), + ], + options={ + "abstract": False, + }, + ), + ] From f3f615f3b87adec2cd6743ad16e5c27095c59ff0 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Tue, 21 May 2024 14:54:08 -0300 Subject: [PATCH 14/19] Corrige for em initiate_article_availability_check --- article/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/article/tasks.py b/article/tasks.py index 76d9750c..4a73e989 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -39,7 +39,7 @@ def initiate_article_availability_check( articles = Article.objects.filter(query) for article in articles.iterator(): - for article_per_lang in ['pt', 'en']: + for article_per_lang in article.doi_with_lang.lang: process_article_availability.apply_async( kwargs=dict( user_id=user_id, From 9bb5aeac74d5998b0a9d7995e1a828f5294cf7ab Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 23 May 2024 12:13:46 -0300 Subject: [PATCH 15/19] Cria Choices VERIFY_ARTICLE_TYPE E VERIFY_HTTP_ERROR_CODE --- article/choices.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/article/choices.py b/article/choices.py index 0b074cd1..a514341c 100644 --- a/article/choices.py +++ b/article/choices.py @@ -1,4 +1,5 @@ from django.utils.translation import gettext as _ +from core.utils.requester import NonRetryableError, RetryableError # Model RequestArticleChange, Field change_type RCT_UPDATE = "update" @@ -124,3 +125,13 @@ (AS_SCHEDULED_TO_PUBLISH, _("Scheduled to publish")), (AS_PUBLISHED, _("Published")), ) + +VERIFY_ARTICLE_TYPE = [ + ("TEXT", _("Text")), + ("PDF", _("pdf")), +] + +VERIFY_HTTP_ERROR_CODE = [ + (RetryableError, _("Excessively long response time. Retry later")), + (NonRetryableError, _("Site not found.")), +] \ No newline at end of file From 4bc73af2ba92d3f776a05f0f800fd0f9457847f8 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 23 May 2024 12:28:43 -0300 Subject: [PATCH 16/19] - Adiciona campo type e available em ScieloSiteStatus - Modifica o paramentro Status para exemplicar melhor o error - Adiciona paramentro available --- article/models.py | 66 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/article/models.py b/article/models.py index f276f543..a0a8c286 100644 --- a/article/models.py +++ b/article/models.py @@ -2,7 +2,7 @@ import datetime from django.contrib.auth import get_user_model -from django.db import models +from django.db import models, IntegrityError from django.utils.translation import gettext_lazy as _ from modelcluster.fields import ParentalKey from modelcluster.models import ClusterableModel @@ -30,6 +30,10 @@ User = get_user_model() +def verify_type_of_url(type): + return dict(choices.VERIFY_ARTICLE_TYPE).get("PDF") if type else dict(choices.VERIFY_ARTICLE_TYPE).get("TEXT") + + class Article(ClusterableModel, CommonControlField): """ No contexto de Upload, Article deve conter o mínimo de campos, @@ -326,6 +330,7 @@ class CheckArticleAvailability(CommonControlField): Article, on_delete=models.SET_NULL, null=True, + unique=True, ) site_status = models.ManyToManyField( "ScieloSiteStatus" @@ -342,12 +347,16 @@ def create_or_update_scielo_site_status( self, url, status, + type, + available, user, date=None, ): obj = ScieloSiteStatus.create_or_update( url=url, status=status, + type=type, + available=available, date=date, user=user, ) @@ -360,18 +369,25 @@ def create( cls, article, status, + available, url, + type, user, date=None, ): - obj = cls( - article=article, - creator=user, - ) - obj.save() + try: + obj = cls( + article=article, + creator=user, + ) + obj.save() + except IntegrityError: + obj = cls.get(article=article) obj.create_or_update_scielo_site_status( url=url, status=status, + type=type, + available=available, user=user, date=date, ) @@ -381,7 +397,9 @@ def create( def create_or_update(cls, article, status, + available, url, + type, user, date=None, ): @@ -390,6 +408,8 @@ def create_or_update(cls, obj.create_or_update_scielo_site_status( url=url, status=status, + type=type, + available=available, date=date, user=user, ) @@ -398,7 +418,9 @@ def create_or_update(cls, cls.create( article=article, status=status, + available=available, url=url, + type=type, date=date, user=user ) @@ -406,18 +428,36 @@ def create_or_update(cls, class ScieloSiteStatus(CommonControlField): check_date = models.DateTimeField(null=True, blank=True) url_site_scielo = models.SlugField(max_length=500, unique=True) + status = models.CharField( + max_length=80, + null=True, + blank=True + ) + type = models.CharField( + max_length=10, + choices=choices.VERIFY_ARTICLE_TYPE, + null=True, + blank=True, + ) available = models.BooleanField(default=False) def update( self, status, + type, + available, date=None, ): self.check_date = date or datetime.datetime.now() - self.available = status + self.status = status + self.available = available + self.type = verify_type_of_url(type) self.save() return self + class Meta: + verbose_name = "Scielo Site Status" + verbose_name_plural = "Scielo Site Status" @classmethod def get(cls, url): @@ -429,6 +469,8 @@ def create( cls, url, status, + type, + available, user, date=None, ): @@ -436,7 +478,9 @@ def create( obj = cls( check_date=date, url_site_scielo=url, - available=status, + status=status, + type=verify_type_of_url(type), + available=available, creator=user ) obj.save() @@ -447,6 +491,8 @@ def create_or_update( cls, url, status, + type, + available, user, date=None, ): @@ -454,6 +500,8 @@ def create_or_update( obj = cls.get(url=url) obj.update( status=status, + type=type, + available=available, date=date ) return obj @@ -461,6 +509,8 @@ def create_or_update( return cls.create( url=url, status=status, + type=type, + available=available, user=user, date=date ) \ No newline at end of file From 5f978ffcee152bb4da35cdfba715f1aecc1fd133 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 23 May 2024 12:30:18 -0300 Subject: [PATCH 17/19] - Adiciona bloco try-except em initiate_article_availability_check e process_article_availability - Melhora o registro das requisicoes em process_article_availability --- article/tasks.py | 72 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 4a73e989..3d6bfbe0 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -1,13 +1,17 @@ +import re import sys from django.db.models import Q +from django.utils.translation import gettext_lazy as _ +from article.choices import VERIFY_HTTP_ERROR_CODE from config import celery_app from core.utils.get_user import _get_user -from core.utils.requester import fetch_data +from core.utils.requester import fetch_data, NonRetryableError, RetryableError from article.models import CheckArticleAvailability, Article from collection.models import Collection from tracker.models import UnexpectedEvent + @celery_app.task(bind=True) def initiate_article_availability_check( self, @@ -38,25 +42,39 @@ def initiate_article_availability_check( articles = Article.objects.filter(query) - for article in articles.iterator(): - for article_per_lang in article.doi_with_lang.lang: - process_article_availability.apply_async( - kwargs=dict( - user_id=user_id, - username=username, - pid_v3=article.pid_v3, - journal_acron=article.journal.journal_acron, - lang=article_per_lang, + try: + for article in articles.iterator(): + for article_per_lang in article.doi_with_lang.lang: + process_article_availability.apply_async( + kwargs=dict( + user_id=user_id, + username=username, + pid_v3=article.pid_v3, + pid_v2=article.sps_pkg.articleproc_set.first().pid, + journal_acron=article.journal.journal_acron, + lang=article_per_lang, + domain=article.journal.journalproc_set.first().collection.websiteconfiguration_set.get(enabled=True).url, + ) ) - ) - + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + e=e, + exc_traceback=exc_traceback, + detail={ + "function": "article.tasks.initiate_article_availability_check", + }, + ) @celery_app.task(bind=True) -def process_article_availability(self, user_id, username, pid_v3, journal_acron, lang): +def process_article_availability(self, user_id, username, pid_v3, pid_v2, journal_acron, lang, domain,): urls = [ - f"https://www.scielo.br/scielo.php?script=sci_arttext&pid={pid_v3}&lng={lang}&nrm=iso", - f"https://www.scielo.br/j/{journal_acron}/a/{pid_v3}/?lang={lang}" + f"{domain}/scielo.php?script=sci_arttext&pid={pid_v2}&lang={lang}&nrm=iso", + f"{domain}/j/{journal_acron}/a/{pid_v3}/?lang={lang}", + f"{domain}/scielo.php?script=sci_arttext&pid={pid_v2}&format=pdf&lng={lang}&nrm=iso", + f"{domain}/j/{journal_acron}/a/{pid_v3}/?format=pdf&lang={lang}", ] + pattern = r"format=pdf" try: user = _get_user(self.request, user_id=user_id, username=username) article = Article.objects.get(pid_v3=pid_v3) @@ -64,19 +82,24 @@ def process_article_availability(self, user_id, username, pid_v3, journal_acron, for url in urls: try: response = fetch_data(url, timeout=2, verify=True) + except Exception as e : CheckArticleAvailability.create_or_update( article=article, - status=True, - url=url, - user=user, - ) - except Exception as e: - CheckArticleAvailability.create_or_update( - article=article, - status=False, + status=dict(VERIFY_HTTP_ERROR_CODE).get(type(e), _("An unknown error occurred")), + available=False, url=url, + type=re.search(pattern, url), user=user, ) + continue + CheckArticleAvailability.create_or_update( + article=article, + status="Site Available", + available=True, + url=url, + type=re.search(pattern, url), + user=user, + ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() UnexpectedEvent.create( @@ -84,6 +107,7 @@ def process_article_availability(self, user_id, username, pid_v3, journal_acron, exc_traceback=exc_traceback, detail={ "function": "article.tasks.process_article_availability", - "urls": urls + "urls": urls, + "url": url, }, ) \ No newline at end of file From a99427b9bb9f1dbc9c2b10a6586385fb3a364c69 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 23 May 2024 12:30:35 -0300 Subject: [PATCH 18/19] Adiciona novos displays e filter em ScieloSiteStatus --- article/wagtail_hooks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index 1c2803b4..7fad0231 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -250,13 +250,18 @@ class ScieloSiteStatusAdmin(ModelAdmin): list_display = ( "article", "url_site_scielo", - "available", + "status", "check_date", + "available", + "type", ) search_fields= ( "url_site_scielo", "checkarticleavailability__article__pid_v3" ) + list_filter = ( + "type", + ) menu_order = 200 add_to_settings_menu = False exclude_from_explorer = False From 2706e9d010bcf0fcdff28911f0b6494a70f25f51 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 23 May 2024 12:30:40 -0300 Subject: [PATCH 19/19] migracao --- ..._scielositestatus_checkarticleavailability.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/article/migrations/0002_scielositestatus_checkarticleavailability.py b/article/migrations/0002_scielositestatus_checkarticleavailability.py index a0510976..20d3bfea 100644 --- a/article/migrations/0002_scielositestatus_checkarticleavailability.py +++ b/article/migrations/0002_scielositestatus_checkarticleavailability.py @@ -1,4 +1,4 @@ -# Generated by Django 5.0.3 on 2024-05-21 00:43 +# Generated by Django 5.0.3 on 2024-05-23 15:09 import django.db.models.deletion from django.conf import settings @@ -38,6 +38,16 @@ class Migration(migrations.Migration): ), ("check_date", models.DateTimeField(blank=True, null=True)), ("url_site_scielo", models.SlugField(max_length=500, unique=True)), + ("status", models.CharField(blank=True, max_length=80, null=True)), + ( + "type", + models.CharField( + blank=True, + choices=[("TEXT", "Texto"), ("PDF", "pdf")], + max_length=10, + null=True, + ), + ), ("available", models.BooleanField(default=False)), ( "creator", @@ -63,7 +73,8 @@ class Migration(migrations.Migration): ), ], options={ - "abstract": False, + "verbose_name": "Scielo Site Status", + "verbose_name_plural": "Scielo Site Status", }, ), migrations.CreateModel( @@ -96,6 +107,7 @@ class Migration(migrations.Migration): null=True, on_delete=django.db.models.deletion.SET_NULL, to="article.article", + unique=True, ), ), (