Skip to content

Commit

Permalink
Merge pull request #200 from CogStack/meta-cat-model-preds
Browse files Browse the repository at this point in the history
CU-2e77aae MetaCAT model Predictions
  • Loading branch information
tomolopolis authored Sep 4, 2024
2 parents f3b593b + 3c7fc5a commit 757a903
Show file tree
Hide file tree
Showing 18 changed files with 607 additions and 186 deletions.
2 changes: 1 addition & 1 deletion webapp/api/api/admin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
admin.site.register(Entity)
admin.site.register(MetaTaskValue)
admin.site.register(MetaTask)
admin.site.register(MetaAnnotation)
admin.site.register(Vocabulary)
admin.site.register(Relation)
admin.site.register(EntityRelation)
admin.site.register(ProjectGroup, ProjectGroupAdmin)
admin.site.register(ProjectAnnotateEntities, ProjectAnnotateEntitiesAdmin)
admin.site.register(AnnotatedEntity, AnnotatedEntityAdmin)
admin.site.register(MetaAnnotation, MetaAnnotationAdmin)
admin.site.register(ConceptDB, ConceptDBAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(ExportedProject, ExportedProjectAdmin)
Expand Down
4 changes: 3 additions & 1 deletion webapp/api/api/admin/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ def reset_project(modeladmin, request, queryset):
# Remove all annotations and cascade to meta anns
AnnotatedEntity.objects.filter(project=project).delete()

# Set all validated documents to none
# Clear validated_docuents and prepared_documents
project.validated_documents.clear()
project.prepared_documents.clear()



def download_without_text(modeladmin, request, queryset):
Expand Down
15 changes: 11 additions & 4 deletions webapp/api/api/admin/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..models import *

_PROJECT_ANNO_ENTS_SETTINGS_FIELD_ORDER = (
'concept_db', 'vocab', 'cdb_search_filter', 'require_entity_validation', 'train_model_on_submit',
'concept_db', 'vocab', 'model_pack', 'cdb_search_filter', 'require_entity_validation', 'train_model_on_submit',
'add_new_entities', 'restrict_concept_lookup', 'terminate_available', 'irrelevant_available',
'enable_entity_annotation_comments', 'tasks', 'relations'
)
Expand Down Expand Up @@ -43,7 +43,8 @@ class ProjectAnnotateEntitiesAdmin(admin.ModelAdmin):
actions = [download, download_without_text, download_without_text_with_doc_names, reset_project, clone_projects]
list_filter = ('members', 'project_status', 'project_locked', 'annotation_classification')
list_display = ['name']
fields = (('group', 'name', 'description', 'annotation_guideline_link', 'members', 'dataset', 'validated_documents') +
fields = (('group', 'name', 'description', 'annotation_guideline_link', 'members',
'dataset', 'validated_documents', 'prepared_documents') +
_PROJECT_FIELDS_ORDER +
_PROJECT_ANNO_ENTS_SETTINGS_FIELD_ORDER)

Expand All @@ -55,7 +56,7 @@ def formfield_for_foreignkey(self, db_field, request, **kwargs):
def formfield_for_manytomany(self, db_field, request, **kwargs):
if db_field.name == 'cdb_search_filter':
kwargs['queryset'] = ConceptDB.objects.all()
if db_field.name == 'validated_documents':
if db_field.name in ('validated_documents', 'prepared_documents'):
project_id = request.path.replace('/admin/api/projectannotateentities/', '').split('/')[0]
try:
proj = ProjectAnnotateEntities.objects.get(id=int(project_id))
Expand Down Expand Up @@ -165,7 +166,13 @@ def metacats(self, obj):
class MetaCATModelAdmin(admin.ModelAdmin):
model = MetaCATModel
list_display = ('name', 'meta_cat_dir')
list_filter = ['meta_task']


class MetaAnnotationAdmin(admin.ModelAdmin):
model = MetaAnnotation
list_display = ('annotated_entity', 'meta_task', 'meta_task_value', 'acc',
'predicted_meta_task_value', 'validated', 'last_modified')
list_filter = ('meta_task', 'meta_task_value', 'predicted_meta_task_value', 'validated')


class DocumentAdmin(admin.ModelAdmin):
Expand Down
16 changes: 11 additions & 5 deletions webapp/api/api/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@
def calculate_metrics(project_ids: List[int], report_name: str):
"""
Computes metrics in a background task
:param projects: list of projects to compute metrics for. Uses the 'first' for the CDB, but
should be the same CDB, but will still try and compute metrics regardless
:param projects: list of projects to compute metrics for.
Uses the 'first' for the CDB / vocab or ModelPack,
but should be the same CDB, but will still try and compute metrics regardless.
:return: computed metrics results
"""
logger.info('Calculating metrics for report: %s', report_name)
projects = [ProjectAnnotateEntities.objects.filter(id=p_id).first() for p_id in project_ids]
cdb = CDB.load(projects[0].concept_db.cdb_file.path)
vocab = Vocab.load(projects[0].vocab.vocab_file.path)
cat = CAT(cdb, vocab, config=cdb.config)
if projects[0].cdb is None:
# assume the model pack is set.
cat = CAT.load_model_pack(projects[0].model_pack.model_pack.path)
else:
# assume the cdb / vocab is set in these projects
cdb = CDB.load(projects[0].concept_db.cdb_file.path)
vocab = Vocab.load(projects[0].vocab.vocab_file.path)
cat = CAT(cdb, vocab, config=cdb.config)
project_data = retrieve_project_data(projects)
metrics = ProjectMetrics(project_data, cat)
report = metrics.generate_report()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Generated by Django 5.0.6 on 2024-08-28 10:56

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0081_alter_metatask_name'),
]

operations = [
migrations.RemoveField(
model_name='metacatmodel',
name='meta_task',
),
migrations.AddField(
model_name='metaannotation',
name='predicted_meta_task_value',
field=models.ForeignKey(blank=True, help_text='meta annotation predicted by a MetaAnnotationModel', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='predicted_value', to='api.metataskvalue'),
),
migrations.AddField(
model_name='metatask',
name='prediction_model',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.metacatmodel'),
),
migrations.AddField(
model_name='project',
name='meta_cat_predictions',
field=models.BooleanField(default=False, help_text='If MetaTasks are setup on the project and there are associated MetaCATModel instances, display these predictions in the interface to be validated / corrected'),
),
migrations.AddField(
model_name='projectannotateentities',
name='model_pack',
field=models.ForeignKey(blank=True, default=None, help_text='A MedCAT model pack. This will raise an exception if both the CDB and Vocab and ModelPack fields are set', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.modelpack'),
),
migrations.AddField(
model_name='projectgroup',
name='meta_cat_predictions',
field=models.BooleanField(default=False, help_text='If MetaTasks are setup on the project and there are associated MetaCATModel instances, display these predictions in the interface to be validated / corrected'),
),
migrations.AddField(
model_name='projectgroup',
name='model_pack',
field=models.ForeignKey(blank=True, default=None, help_text='A MedCAT model pack. This will raise an exception if both the CDB and Vocab and ModelPack fields are set', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.modelpack'),
),
migrations.AlterField(
model_name='metaannotation',
name='validated',
field=models.BooleanField(default=False, help_text='If an annotation is not '),
),
migrations.AlterField(
model_name='metacatmodel',
name='meta_cat_dir',
field=models.FilePathField(allow_folders=True, editable=False, help_text='The zip or dir for a MetaCAT model, not editable, is set via a model pack .zip upload'),
),
migrations.AlterField(
model_name='metacatmodel',
name='name',
field=models.CharField(help_text='The task name followed by the underlying model impl', max_length=100),
),
migrations.AlterField(
model_name='projectannotateentities',
name='concept_db',
field=models.ForeignKey(blank=True, help_text='The MedCAT CDB used to annotate / validate', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.conceptdb'),
),
migrations.AlterField(
model_name='projectannotateentities',
name='tasks',
field=models.ManyToManyField(blank=True, default=None, help_text='The set of MetaAnnotation tasks configured for this project, this will default to the set of Tasks configured in a ModelPack if a model pack is used for the project', to='api.metatask'),
),
migrations.AlterField(
model_name='projectannotateentities',
name='vocab',
field=models.ForeignKey(blank=True, help_text='The MedCAT Vocab used to annotate / validate', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.vocabulary'),
),
migrations.AlterField(
model_name='projectgroup',
name='cdb_search_filter',
field=models.ManyToManyField(blank=True, help_text='The CDB that will be used for concept lookup. This specific CDB should have been "imported" via the CDB admin screen', related_name='project_group_concept_source', to='api.conceptdb'),
),
migrations.AlterField(
model_name='projectgroup',
name='concept_db',
field=models.ForeignKey(blank=True, help_text='The MedCAT CDB used to annotate / validate', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.conceptdb'),
),
migrations.AlterField(
model_name='projectgroup',
name='tasks',
field=models.ManyToManyField(blank=True, default=None, help_text='The set of MetaAnnotation tasks configured for this project, this will default to the set of Tasks configured in a ModelPack if a model pack is used for the project', to='api.metatask'),
),
migrations.AlterField(
model_name='projectgroup',
name='vocab',
field=models.ForeignKey(blank=True, help_text='The MedCAT Vocab used to annotate / validate', null=True, on_delete=django.db.models.deletion.SET_NULL, to='api.vocabulary'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.0.6 on 2024-08-29 11:25

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0082_remove_metacatmodel_meta_task_and_more'),
]

operations = [
migrations.AddField(
model_name='project',
name='prepared_documents',
field=models.ManyToManyField(blank=True, default=None, help_text='Set automatically on each prep of a document', related_name='prepared_documents', to='api.document'),
),
migrations.AlterField(
model_name='project',
name='validated_documents',
field=models.ManyToManyField(blank=True, default=None, help_text='Set automatically on each doc submission', to='api.document'),
),
]
127 changes: 127 additions & 0 deletions webapp/api/api/model_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import logging
import os
from typing import Dict

import pkg_resources
from medcat.cat import CAT
from medcat.cdb import CDB
from medcat.vocab import Vocab

from api.models import ConceptDB

"""
Module level caches for CDBs, Vocabs and CAT instances.
"""
# Maps between IDs and objects
CDB_MAP = {}
VOCAB_MAP = {}
CAT_MAP = {}


logger = logging.getLogger(__name__)


def get_medcat_from_cdb_vocab(project,
cdb_map: Dict[str, CDB]=CDB_MAP,
vocab_map: Dict[str, Vocab]=VOCAB_MAP,
cat_map: Dict[str, CAT]=CAT_MAP) -> CAT:
cdb_id = project.concept_db.id
vocab_id = project.vocab.id
cat_id = str(cdb_id) + "-" + str(vocab_id)
if cat_id in cat_map:
cat = cat_map[cat_id]
else:
if cdb_id in cdb_map:
cdb = cdb_map[cdb_id]
else:
cdb_path = project.concept_db.cdb_file.path
try:
cdb = CDB.load(cdb_path)
except KeyError as ke:
mc_v = pkg_resources.get_distribution('medcat').version
if int(mc_v.split('.')[0]) > 0:
logger.error('Attempted to load MedCAT v0.x model with MCTrainer v1.x')
raise Exception('Attempted to load MedCAT v0.x model with MCTrainer v1.x',
'Please re-configure this project to use a MedCAT v1.x CDB or consult the '
'MedCATTrainer Dev team if you believe this should work') from ke
raise

custom_config = os.getenv("MEDCAT_CONFIG_FILE")
if custom_config is not None and os.path.exists(custom_config):
cdb.config.parse_config_file(path=custom_config)
else:
logger.info("No MEDCAT_CONFIG_FILE env var set to valid path, using default config available on CDB")
cdb_map[cdb_id] = cdb

if vocab_id in vocab_map:
vocab = vocab_map[vocab_id]
else:
vocab_path = project.vocab.vocab_file.path
vocab = Vocab.load(vocab_path)
vocab_map[vocab_id] = vocab
cat = CAT(cdb=cdb, config=cdb.config, vocab=vocab)
cat_map[cat_id] = cat
return cat


def get_medcat_from_model_pack(project, cat_map: Dict[str, CAT]=CAT_MAP) -> CAT:
model_pack_obj = project.model_pack
cat_id = 'mp' + str(model_pack_obj.id)
logger.info('Loading model pack from:%s', model_pack_obj.model_pack.path)
cat = CAT.load_model_pack(model_pack_obj.model_pack.path)
cat_map[cat_id] = cat
return cat


def get_medcat(project,
cdb_map: Dict[str, CDB]=CDB_MAP,
vocab_map: Dict[str, Vocab]=VOCAB_MAP,
cat_map: Dict[str, CAT]=CAT_MAP):
try:
if project.model_pack is None:
cat = get_medcat_from_cdb_vocab(project, cdb_map, vocab_map, cat_map)
else:
cat = get_medcat_from_model_pack(project, cat_map)
return cat
except AttributeError:
raise Exception('Failure loading Project ConceptDB, Vocab or Model Pack. Are these set correctly?')


def get_cached_medcat(project, cat_map: Dict[str, CAT]=CAT_MAP):
if project.concept_db is None or project.vocab is None:
return None
cdb_id = project.concept_db.id
vocab_id = project.vocab.id
cat_id = str(cdb_id) + "-" + str(vocab_id)
return cat_map.get(cat_id)


def clear_cached_medcat(project, cat_map: Dict[str, CAT]=CAT_MAP):
cdb_id = project.concept_db.id
vocab_id = project.vocab.id
cat_id = str(cdb_id) + "-" + str(vocab_id)
if cat_id in cat_map:
del cat_map[cat_id]


def get_cached_cdb(cdb_id: str, cdb_map: Dict[str, CDB]=CDB_MAP) -> CDB:
if cdb_id not in cdb_map:
cdb_obj = ConceptDB.objects.get(id=cdb_id)
cdb = CDB.load(cdb_obj.cdb_file.path)
cdb_map[cdb_id] = cdb
return cdb_map[cdb_id]


def clear_cached_cdb(cdb_id, cdb_map: Dict[str, CDB]=CDB_MAP):
if cdb_id in cdb_map:
del cdb_map[cdb_id]


def is_model_loaded(project,
cdb_map: Dict[str, CDB]=CDB_MAP,
cat_map: Dict[str, CAT]=CAT_MAP):
if project.concept_db is None:
# model pack is used.
return False if not project.model_pack else f'mp{project.model_pack.id}' in cat_map
else:
return False if not project.concept_db else project.concept_db.id in cdb_map
Loading

0 comments on commit 757a903

Please sign in to comment.