diff --git a/article/choices.py b/article/choices.py index 0b074cd1..a514341c 100644 --- a/article/choices.py +++ b/article/choices.py @@ -1,4 +1,5 @@ from django.utils.translation import gettext as _ +from core.utils.requester import NonRetryableError, RetryableError # Model RequestArticleChange, Field change_type RCT_UPDATE = "update" @@ -124,3 +125,13 @@ (AS_SCHEDULED_TO_PUBLISH, _("Scheduled to publish")), (AS_PUBLISHED, _("Published")), ) + +VERIFY_ARTICLE_TYPE = [ + ("TEXT", _("Text")), + ("PDF", _("pdf")), +] + +VERIFY_HTTP_ERROR_CODE = [ + (RetryableError, _("Excessively long response time. Retry later")), + (NonRetryableError, _("Site not found.")), +] \ No newline at end of file diff --git a/article/migrations/0002_scielositestatus_checkarticleavailability.py b/article/migrations/0002_scielositestatus_checkarticleavailability.py new file mode 100644 index 00000000..20d3bfea --- /dev/null +++ b/article/migrations/0002_scielositestatus_checkarticleavailability.py @@ -0,0 +1,141 @@ +# Generated by Django 5.0.3 on 2024-05-23 15:09 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("article", "0001_initial"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ScieloSiteStatus", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ("check_date", models.DateTimeField(blank=True, null=True)), + ("url_site_scielo", models.SlugField(max_length=500, unique=True)), + ("status", models.CharField(blank=True, max_length=80, null=True)), + ( + "type", + models.CharField( + blank=True, + choices=[("TEXT", "Texto"), ("PDF", "pdf")], + max_length=10, + null=True, + ), + ), + ("available", models.BooleanField(default=False)), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "verbose_name": "Scielo Site Status", + "verbose_name_plural": "Scielo Site Status", + }, + ), + migrations.CreateModel( + name="CheckArticleAvailability", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ( + "article", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="article.article", + unique=True, + ), + ), + ( + "creator", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ("site_status", models.ManyToManyField(to="article.scielositestatus")), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/article/models.py b/article/models.py index 2c59a1f1..a0a8c286 100644 --- a/article/models.py +++ b/article/models.py @@ -1,7 +1,8 @@ import logging +import datetime from django.contrib.auth import get_user_model -from django.db import models +from django.db import models, IntegrityError from django.utils.translation import gettext_lazy as _ from modelcluster.fields import ParentalKey from modelcluster.models import ClusterableModel @@ -29,6 +30,10 @@ User = get_user_model() +def verify_type_of_url(type): + return dict(choices.VERIFY_ARTICLE_TYPE).get("PDF") if type else dict(choices.VERIFY_ARTICLE_TYPE).get("TEXT") + + class Article(ClusterableModel, CommonControlField): """ No contexto de Upload, Article deve conter o mínimo de campos, @@ -314,3 +319,198 @@ def __str__(self) -> str: return f"{self.article or self.pid_v3} - {self.deadline}" base_form_class = RequestArticleChangeForm + + +class CheckArticleAvailability(CommonControlField): + """ + Modelo para armazenar o status de disponibilidade nos sites, + tanto na nova versao, quanto na antiga, do scielo.br. + """ + article = models.ForeignKey( + Article, + on_delete=models.SET_NULL, + null=True, + unique=True, + ) + site_status = models.ManyToManyField( + "ScieloSiteStatus" + ) + + def __str__(self): + return f"{self.article.pid_v3}" + + @classmethod + def get(cls, article): + return cls.objects.get(article=article) + + def create_or_update_scielo_site_status( + self, + url, + status, + type, + available, + user, + date=None, + ): + obj = ScieloSiteStatus.create_or_update( + url=url, + status=status, + type=type, + available=available, + date=date, + user=user, + ) + self.site_status.add(obj) + self.save() + + + @classmethod + def create( + cls, + article, + status, + available, + url, + type, + user, + date=None, + ): + try: + obj = cls( + article=article, + creator=user, + ) + obj.save() + except IntegrityError: + obj = cls.get(article=article) + obj.create_or_update_scielo_site_status( + url=url, + status=status, + type=type, + available=available, + user=user, + date=date, + ) + return obj + + @classmethod + def create_or_update(cls, + article, + status, + available, + url, + type, + user, + date=None, + ): + try: + obj = cls.get(article=article) + obj.create_or_update_scielo_site_status( + url=url, + status=status, + type=type, + available=available, + date=date, + user=user, + ) + return obj + except cls.DoesNotExist: + cls.create( + article=article, + status=status, + available=available, + url=url, + type=type, + date=date, + user=user + ) + +class ScieloSiteStatus(CommonControlField): + check_date = models.DateTimeField(null=True, blank=True) + url_site_scielo = models.SlugField(max_length=500, unique=True) + status = models.CharField( + max_length=80, + null=True, + blank=True + ) + type = models.CharField( + max_length=10, + choices=choices.VERIFY_ARTICLE_TYPE, + null=True, + blank=True, + ) + available = models.BooleanField(default=False) + + def update( + self, + status, + type, + available, + date=None, + ): + self.check_date = date or datetime.datetime.now() + self.status = status + self.available = available + self.type = verify_type_of_url(type) + self.save() + return self + + class Meta: + verbose_name = "Scielo Site Status" + verbose_name_plural = "Scielo Site Status" + + @classmethod + def get(cls, url): + return cls.objects.get(url_site_scielo=url) + + + @classmethod + def create( + cls, + url, + status, + type, + available, + user, + date=None, + ): + date = date or datetime.datetime.now() + obj = cls( + check_date=date, + url_site_scielo=url, + status=status, + type=verify_type_of_url(type), + available=available, + creator=user + ) + obj.save() + return obj + + @classmethod + def create_or_update( + cls, + url, + status, + type, + available, + user, + date=None, + ): + try: + obj = cls.get(url=url) + obj.update( + status=status, + type=type, + available=available, + date=date + ) + return obj + except cls.DoesNotExist: + return cls.create( + url=url, + status=status, + type=type, + available=available, + user=user, + date=date + ) \ No newline at end of file diff --git a/article/scripts/load_check_article.py b/article/scripts/load_check_article.py new file mode 100644 index 00000000..98a15ae3 --- /dev/null +++ b/article/scripts/load_check_article.py @@ -0,0 +1,11 @@ +from article.tasks import initiate_article_availability_check + + +def run(pid_v3, username=None, user_id=None): + initiate_article_availability_check.apply_async( + kwargs=dict( + username=username, + user_id=user_id, + article_pid_v3=pid_v3, + ) + ) \ No newline at end of file diff --git a/article/tasks.py b/article/tasks.py new file mode 100644 index 00000000..3d6bfbe0 --- /dev/null +++ b/article/tasks.py @@ -0,0 +1,113 @@ +import re +import sys +from django.db.models import Q +from django.utils.translation import gettext_lazy as _ + +from article.choices import VERIFY_HTTP_ERROR_CODE +from config import celery_app +from core.utils.get_user import _get_user +from core.utils.requester import fetch_data, NonRetryableError, RetryableError +from article.models import CheckArticleAvailability, Article +from collection.models import Collection +from tracker.models import UnexpectedEvent + + +@celery_app.task(bind=True) +def initiate_article_availability_check( + self, + username, + user_id, + issn_print=None, + issn_electronic=None, + publication_year=None, + updated=None, + article_pid_v3=None, + collection_acron=None, +): + if collection_acron: + collection = Collection.objects.filter(acron=collection_acron) + else: + collection = Collection.objects.all() + + query = Q(journal__journalproc__collection__in=collection) + if not updated: + if article_pid_v3: + query |= Q(pid_v3=article_pid_v3) + if issn_print: + query |= Q(journal__official_journal__issn_print=issn_print) + if issn_electronic: + query |= Q(journal__official_journal__issn_electronic=issn_electronic) + if publication_year: + query |= Q(issue__publication_year=publication_year) + + articles = Article.objects.filter(query) + + try: + for article in articles.iterator(): + for article_per_lang in article.doi_with_lang.lang: + process_article_availability.apply_async( + kwargs=dict( + user_id=user_id, + username=username, + pid_v3=article.pid_v3, + pid_v2=article.sps_pkg.articleproc_set.first().pid, + journal_acron=article.journal.journal_acron, + lang=article_per_lang, + domain=article.journal.journalproc_set.first().collection.websiteconfiguration_set.get(enabled=True).url, + ) + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + e=e, + exc_traceback=exc_traceback, + detail={ + "function": "article.tasks.initiate_article_availability_check", + }, + ) + +@celery_app.task(bind=True) +def process_article_availability(self, user_id, username, pid_v3, pid_v2, journal_acron, lang, domain,): + urls = [ + f"{domain}/scielo.php?script=sci_arttext&pid={pid_v2}&lang={lang}&nrm=iso", + f"{domain}/j/{journal_acron}/a/{pid_v3}/?lang={lang}", + f"{domain}/scielo.php?script=sci_arttext&pid={pid_v2}&format=pdf&lng={lang}&nrm=iso", + f"{domain}/j/{journal_acron}/a/{pid_v3}/?format=pdf&lang={lang}", + ] + pattern = r"format=pdf" + try: + user = _get_user(self.request, user_id=user_id, username=username) + article = Article.objects.get(pid_v3=pid_v3) + + for url in urls: + try: + response = fetch_data(url, timeout=2, verify=True) + except Exception as e : + CheckArticleAvailability.create_or_update( + article=article, + status=dict(VERIFY_HTTP_ERROR_CODE).get(type(e), _("An unknown error occurred")), + available=False, + url=url, + type=re.search(pattern, url), + user=user, + ) + continue + CheckArticleAvailability.create_or_update( + article=article, + status="Site Available", + available=True, + url=url, + type=re.search(pattern, url), + user=user, + ) + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + e=e, + exc_traceback=exc_traceback, + detail={ + "function": "article.tasks.process_article_availability", + "urls": urls, + "url": url, + }, + ) \ No newline at end of file diff --git a/article/wagtail_hooks.py b/article/wagtail_hooks.py index 4e28b5fa..7fad0231 100644 --- a/article/wagtail_hooks.py +++ b/article/wagtail_hooks.py @@ -14,7 +14,7 @@ from config.menu import get_menu_order from .button_helper import ArticleButtonHelper, RequestArticleChangeButtonHelper -from .models import Article, RelatedItem, RequestArticleChange, choices +from .models import Article, RelatedItem, RequestArticleChange, choices, ScieloSiteStatus from .permission_helper import ArticlePermissionHelper # from upload import exceptions as upload_exceptions @@ -243,8 +243,38 @@ class ArticleModelAdminGroup(ModelAdminGroup): # RequestArticleChangeModelAdmin, ) +class ScieloSiteStatusAdmin(ModelAdmin): + model = ScieloSiteStatus + menu_label = "Scielo Site Status" + menu_icon = "doc-full" + list_display = ( + "article", + "url_site_scielo", + "status", + "check_date", + "available", + "type", + ) + search_fields= ( + "url_site_scielo", + "checkarticleavailability__article__pid_v3" + ) + list_filter = ( + "type", + ) + menu_order = 200 + add_to_settings_menu = False + exclude_from_explorer = False + + def article(self, obj): + return list(obj.checkarticleavailability_set.all()) + + + def get_queryset(self, request): + return super().get_queryset(request).filter(available=False) + -# modeladmin_register(ArticleModelAdminGroup) +modeladmin_register(ScieloSiteStatusAdmin) modeladmin_register(ArticleModelAdmin) diff --git a/bigbang/tasks.py b/bigbang/tasks.py index a45a2de4..3ed0d6ee 100644 --- a/bigbang/tasks.py +++ b/bigbang/tasks.py @@ -1,19 +1,11 @@ import logging -from django.contrib.auth import get_user_model +from core.utils.get_user import _get_user from bigbang import tasks_scheduler from bigbang.setup import setup from config import celery_app -User = get_user_model() - - -def _get_user(user_id, username): - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) @celery_app.task(bind=True) @@ -36,7 +28,7 @@ def task_setup( file_path=None, config=None, ): - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) if file_path or config: setup(user, file_path, config) diff --git a/core/utils/get_user.py b/core/utils/get_user.py new file mode 100644 index 00000000..8b101694 --- /dev/null +++ b/core/utils/get_user.py @@ -0,0 +1,13 @@ +from django.contrib.auth import get_user_model + + +User = get_user_model() + +def _get_user(request, username=None, user_id=None): + try: + return User.objects.get(pk=request.user.id) + except AttributeError: + if user_id: + return User.objects.get(pk=user_id) + if username: + return User.objects.get(username=username) \ No newline at end of file diff --git a/migration/tasks.py b/migration/tasks.py index e044a265..04bc97fe 100644 --- a/migration/tasks.py +++ b/migration/tasks.py @@ -1,10 +1,10 @@ import logging import sys -from django.contrib.auth import get_user_model from django.utils.translation import gettext_lazy as _ from collection.models import Collection +from core.utils.get_user import _get_user from config import celery_app from htmlxml.models import HTMLXML from proc.models import ArticleProc, IssueProc, JournalProc @@ -12,27 +12,6 @@ from . import controller -User = get_user_model() - - -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "migration.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - def _get_collections(collection_acron): try: @@ -126,7 +105,7 @@ def task_migrate_title_record( Cria um registro MigratedData (source="journal") """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) collection = Collection.get(acron=collection_acron) JournalProc.register_classic_website_data( user, @@ -227,7 +206,7 @@ def task_migrate_issue_record( Cria um registro MigratedData (source="issue") """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) collection = Collection.get(acron=collection_acron) IssueProc.register_classic_website_data( @@ -305,7 +284,7 @@ def task_import_one_issue_files( force_update=False, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.get_files_from_classic_website( user, force_update, controller.import_one_issue_files @@ -379,7 +358,7 @@ def task_import_one_issue_document_records( Cria ou atualiza os registros de ArticleProc """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.get_article_records_from_classic_website( user, force_update, controller.get_article_records_from_classic_website @@ -446,7 +425,7 @@ def task_get_xml( body_and_back_xml=None, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) try: diff --git a/pid_provider/tasks.py b/pid_provider/tasks.py index ed320246..56e5093f 100644 --- a/pid_provider/tasks.py +++ b/pid_provider/tasks.py @@ -1,25 +1,13 @@ import logging -from django.contrib.auth import get_user_model from config import celery_app +from core.utils.get_user import _get_user from pid_provider.provider import PidProvider from pid_provider.requester import PidRequester from proc.models import ArticleProc -User = get_user_model() - - -def _get_user(request, username=None, user_id=None): - try: - return User.objects.get(pk=request.user.id) - except AttributeError: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - @celery_app.task(bind=True, name="provide_pid_for_file") def provide_pid_for_file( diff --git a/proc/tasks.py b/proc/tasks.py index 034fb6fb..ecee3ce8 100644 --- a/proc/tasks.py +++ b/proc/tasks.py @@ -5,33 +5,13 @@ from django.utils.translation import gettext_lazy as _ from collection.models import Collection +from core.utils.get_user import _get_user from config import celery_app from proc.models import ArticleProc, IssueProc, JournalProc from tracker.models import UnexpectedEvent from . import controller -User = get_user_model() - - -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "proc.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - def _get_collections(collection_acron): try: @@ -133,7 +113,7 @@ def task_create_or_update_journal( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = JournalProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_journal @@ -230,7 +210,7 @@ def task_create_or_update_issue( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = IssueProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_issue @@ -315,7 +295,7 @@ def task_generate_sps_package( force_core_update=None, ): try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) if force_core_update and item.sps_pkg: item.sps_pkg.set_registered_in_core(False) @@ -416,7 +396,7 @@ def task_create_or_update_article( """ try: - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) item.create_or_update_item( user, force_update, controller.create_or_update_article @@ -474,6 +454,6 @@ def subtask_synchronize_to_pid_provider( user_id=None, item_id=None ): - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) item = ArticleProc.objects.get(pk=item_id) item.synchronize(user) diff --git a/publication/tasks.py b/publication/tasks.py index cfab76fc..f7c6dcce 100644 --- a/publication/tasks.py +++ b/publication/tasks.py @@ -1,7 +1,6 @@ import logging import sys -from django.contrib.auth import get_user_model from django.utils.translation import gettext_lazy as _ from collection.choices import QA @@ -9,6 +8,7 @@ from config import celery_app from proc.models import ArticleProc, IssueProc, JournalProc from core.models import PressRelease +from core.utils.get_user import _get_user from publication.api.document import publish_article from publication.api.issue import publish_issue from publication.api.journal import publish_journal @@ -16,7 +16,6 @@ from publication.api.publication import PublicationAPI from tracker.models import UnexpectedEvent -User = get_user_model() SCIELO_MODELS = { "journal": JournalProc, @@ -33,25 +32,6 @@ } -def _get_user(user_id, username): - try: - if user_id: - return User.objects.get(pk=user_id) - if username: - return User.objects.get(username=username) - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - e=e, - exc_traceback=exc_traceback, - detail={ - "task": "migration.tasks._get_user", - "user_id": user_id, - "username": username, - }, - ) - - def _get_collections(collection_acron): try: if collection_acron: @@ -220,7 +200,7 @@ def task_publish_model( website_kind = website_kind or QA model_name = model_name or "article" collection = Collection.get(acron=collection_acron) - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) SciELOModel = SCIELO_MODELS.get(model_name) @@ -253,7 +233,7 @@ def task_publish_item( ): try: item = None - user = _get_user(user_id, username) + user = _get_user(self.request, user_id, username) SciELOModel = SCIELO_MODELS.get(model_name) item = SciELOModel.objects.get(pk=item_id) item.publish(user, PUBLISH_FUNCTIONS.get(model_name), website_kind, api_data) @@ -298,7 +278,6 @@ def task_publish_model_inline( ): website_kind = website_kind or QA collection = Collection.get(acron=collection_acron) - user = _get_user(user_id, username) website = WebSiteConfiguration.get( collection=collection, diff --git a/upload/tasks.py b/upload/tasks.py index 50797046..087a38f2 100644 --- a/upload/tasks.py +++ b/upload/tasks.py @@ -1,7 +1,6 @@ import json from celery.result import AsyncResult -from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ from packtools.sps import exceptions as sps_exceptions from packtools.sps.models import package as sps_package @@ -13,6 +12,7 @@ from article.choices import AS_CHANGE_SUBMITTED from article.controller import create_article_from_etree, update_article from article.models import Article +from core.utils.get_user import _get_user from config import celery_app from issue.models import Issue from journal.controller import get_journal_dict_for_validation @@ -23,9 +23,6 @@ from upload.models import Package -User = get_user_model() - - def run_validations( filename, package_id, package_category, article_id=None, issue_id=None ): @@ -528,13 +525,6 @@ def task_get_or_create_package(pid_v3, user_id): ).id -def _get_user(request, user_id): - try: - return User.objects.get(pk=request.user.id) - except AttributeError: - return User.objects.get(pk=user_id) - - @celery_app.task(bind=True, name="request_pid_for_accepted_packages") def task_request_pid_for_accepted_packages(self, user_id): user = _get_user(self.request, user_id)