From d39bd79a1f56e76d1d5cd59b51a52eed3057695b Mon Sep 17 00:00:00 2001 From: tomolopolis Date: Tue, 26 Sep 2023 13:06:34 +0100 Subject: [PATCH] CU-8692nnx2z: resubmit all validated docs on startup, default is true. --- envs/env | 5 ++++- envs/env-prod | 3 +++ webapp/api/api/__init__.py | 2 ++ webapp/api/api/apps.py | 29 +++++++++++++++++++++++++++-- webapp/api/api/views.py | 36 +++++++++++++++++++++--------------- webapp/scripts/run.sh | 2 ++ 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/envs/env b/envs/env index 96d36988..93a2a7a0 100644 --- a/envs/env +++ b/envs/env @@ -24,4 +24,7 @@ CONCEPT_SEARCH_SERVICE_PORT=8983 ### DB backup dir ### DB_DIR=/home/api/db DB_PATH=${DB_DIR}/db.sqlite3 # currently only supports sqlite3 dbs -DB_BACKUP_DIR=/home/api/db-backup \ No newline at end of file +DB_BACKUP_DIR=/home/api/db-backup + +# Resubmit all on startup +RESUBMIT_ALL_ON_STARTUP=1 diff --git a/envs/env-prod b/envs/env-prod index a267ad28..1300a40c 100644 --- a/envs/env-prod +++ b/envs/env-prod @@ -26,3 +26,6 @@ CONCEPT_SEARCH_SERVICE_PORT=8983 DB_DIR=/home/api/db DB_PATH=${DB_DIR}/db.sqlite3 # currently only supports sqlite3 dbs DB_BACKUP_DIR= # should be set to 'good' backup location + +# Resubmit all on startup +RESUBMIT_ALL_ON_STARTUP=1 \ No newline at end of file diff --git a/webapp/api/api/__init__.py b/webapp/api/api/__init__.py index e69de29b..f9e67cbb 100644 --- a/webapp/api/api/__init__.py +++ b/webapp/api/api/__init__.py @@ -0,0 +1,2 @@ + +default_app_config = 'api.apps.ApiConfig' diff --git a/webapp/api/api/apps.py b/webapp/api/api/apps.py index 6d6b3c85..d0209f65 100644 --- a/webapp/api/api/apps.py +++ b/webapp/api/api/apps.py @@ -1,5 +1,30 @@ +import logging +import os + from django.apps import AppConfig +logger = logging.getLogger(__name__) + + +class ApiConfig(AppConfig): + name = 'api' -class AnnoappConfig(AppConfig): - name = 'annoapp' + def ready(self): + from api.views import _submit_document + from api.models import ProjectAnnotateEntities + resubmit_all = os.environ.get('RESUBMIT_ALL_ON_STARTUP', False) + if resubmit_all: + logger.info('Found env var RESUBMIT_ALL_ON_STARTUP is True. ' + 'Attempting to resubmit all currently submitted state documents') + projects = ProjectAnnotateEntities.objects.all() + for project in projects: + validated_docs = project.validated_documents.all() + if len(validated_docs): + for doc in validated_docs: + try: + _submit_document(project, doc) + logger.info("Submitted doc: %s", doc.name) + except Exception as e: + logger.error("Failed to re-submit doc on startup with exception %s", e) + logger.info("Finished resubmitting Project %s", project.name) + logger.info("MedCATTrainer App API ready...") diff --git a/webapp/api/api/views.py b/webapp/api/api/views.py index 671986ef..162cf4eb 100644 --- a/webapp/api/api/views.py +++ b/webapp/api/api/views.py @@ -382,29 +382,19 @@ def import_cdb_concepts(request): return Response({'message': 'submitted cdb import job.'}) -@api_view(http_method_names=['POST']) -def submit_document(request): - # Get project id - p_id = request.data['project_id'] - d_id = request.data['document_id'] - - # Get project and the right version of cat - project = ProjectAnnotateEntities.objects.get(id=p_id) - document = Document.objects.get(id=d_id) - - cat = get_medcat(CDB_MAP=CDB_MAP, VOCAB_MAP=VOCAB_MAP, - CAT_MAP=CAT_MAP, project=project) - +def _submit_document(project: ProjectAnnotateEntities, document: Document): if project.train_model_on_submit: try: + cat = get_medcat(CDB_MAP=CDB_MAP, VOCAB_MAP=VOCAB_MAP, + CAT_MAP=CAT_MAP, project=project) train_medcat(cat, project, document) except Exception as e: if project.vocab.id: if len(VOCAB_MAP[project.vocab.id].unigram_table) == 0: - return HttpResponseServerError('Vocab is missing the unigram table. On the vocab instance ' + return Exception('Vocab is missing the unigram table. On the vocab instance ' 'use vocab.make_unigram_table() to build') else: - return HttpResponseServerError(e.message) + raise e # Add cuis to filter if they did not exist cuis = [] @@ -423,6 +413,22 @@ def submit_document(request): project.cuis += ',' + ','.join(extra_doc_cuis) project.save() + +@api_view(http_method_names=['POST']) +def submit_document(request): + # Get project id + p_id = request.data['project_id'] + d_id = request.data['document_id'] + + # Get project and the right version of cat + project = ProjectAnnotateEntities.objects.get(id=p_id) + document = Document.objects.get(id=d_id) + + try: + _submit_document(project, document) + except Exception as e: + HttpResponseServerError(e.message) + return Response({'message': 'Document submited successfully'}) diff --git a/webapp/scripts/run.sh b/webapp/scripts/run.sh index d4cea627..dfdb1758 100755 --- a/webapp/scripts/run.sh +++ b/webapp/scripts/run.sh @@ -24,3 +24,5 @@ if [ $LOAD_EXAMPLES ]; then fi uwsgi --http-timeout 360s --http :8000 --master --chdir /home/api/ --module core.wsgi + +