Skip to content

Commit

Permalink
Merge pull request #249 from MetaCell/feature/SCKAN-274
Browse files Browse the repository at this point in the history
Feature/sckan 274
  • Loading branch information
afonsobspinto authored Mar 19, 2024
2 parents 3a774a9 + 12c299a commit b959c8c
Show file tree
Hide file tree
Showing 9 changed files with 33,915 additions and 33,711 deletions.
8 changes: 7 additions & 1 deletion backend/composer/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Tag,
Via,
FunctionalCircuitRole,
ProjectionPhenotype, Destination
ProjectionPhenotype, Destination, Synonym
)


Expand Down Expand Up @@ -89,10 +89,16 @@ class SentenceAdmin(
)


class SynonymInline(admin.TabularInline):
model = Synonym
extra = 1


class AnatomicalEntityAdmin(admin.ModelAdmin):
list_display = ("name", "ontology_uri")
list_display_links = ("name", "ontology_uri")
search_fields = ("name",) # or ("^name",) for search to start with
inlines = [SynonymInline]


class ViaInline(SortableStackedInline):
Expand Down
123 changes: 75 additions & 48 deletions backend/composer/management/commands/ingest_anatomical_entities.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,91 @@
import csv

from django.core.management.base import BaseCommand

from composer.models import AnatomicalEntity
import time
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from django.db.utils import IntegrityError
from composer.models import AnatomicalEntity, Synonym

URI = "o"
NAME = "o_label"
SYNONYM = "o_synonym"
BULK_LIMIT = 100


class Command(BaseCommand):
help = "Ingests Anatomical Entities CSV file(s)"
help = "Ingests Anatomical Entities CSV file(s)."

def add_arguments(self, parser):
parser.add_argument("csv_files", nargs="+", type=str)
parser.add_argument("--show_complete_logs", action='store_true',
help="Show detailed logs during processing")

def _process_anatomical_entity(self, name, ontology_uri, synonym, show_complete_logs, processed_uris,
unique_synonyms):
try:
is_first_occurrence = ontology_uri not in processed_uris

def _create_ae(self, name, ontology_uri):
found = AnatomicalEntity.objects.filter(name__iexact=name).exists()
if not found:
return AnatomicalEntity(
name=name,
ontology_uri=ontology_uri
anatomical_entity, created = AnatomicalEntity.objects.get_or_create(
ontology_uri=ontology_uri,
defaults={"name": name},
)
return None
# anatomical_entity, created = AnatomicalEntity.objects.get_or_create(
# name__iexact=name,
# defaults={"ontology_uri": ontology_uri, "name": name},
# )
# if created:
# self.stdout.write(f"Anatomical Entity {name} created.")
# anatomical_entity.save()
if not created and is_first_occurrence:
if anatomical_entity.name != name:
anatomical_entity.name = name
anatomical_entity.save()
if show_complete_logs:
self.stdout.write(
self.style.SUCCESS(f"Updated {anatomical_entity.ontology_uri} name to {name}.")
)

processed_uris.add(ontology_uri)

synonym_key = (ontology_uri, synonym.lower()) if synonym else None
if synonym and synonym_key not in unique_synonyms:
if not Synonym.objects.filter(anatomical_entity=anatomical_entity, name__iexact=synonym).exists():
unique_synonyms[synonym_key] = Synonym(anatomical_entity=anatomical_entity, name=synonym)
if show_complete_logs:
self.stdout.write(
self.style.SUCCESS(f"Synonym '{synonym}' added for {anatomical_entity.ontology_uri}."))
except IntegrityError as e:
self.stdout.write(self.style.ERROR(f"Error processing {ontology_uri}: {e}"))

@transaction.atomic
def handle(self, *args, **options):
start_time = time.time()
show_complete_logs = options['show_complete_logs']
unique_synonyms = {}
processed_uris = set()

for csv_file in options["csv_files"]:
with open(
csv_file, newline="", encoding="utf-8", errors="ignore"
) as csvfile:
aereader = csv.DictReader(
csvfile,
delimiter=";",
quotechar='"',
)
bulk = []
self.stdout.write("Start ingestion of Anatomical Entities")
for row in aereader:
ontology_uri = row[URI]
name = row[NAME]
synonym = row[SYNONYM] or None
ae = self._create_ae(name, ontology_uri)
if ae:
bulk.append(ae)
if synonym:
ae = self._create_ae(synonym, ontology_uri)
if ae:
bulk.append(ae)
if len(bulk) > 100:
self.stdout.write(f"{len(bulk)} new Anatomical Entities created.")
AnatomicalEntity.objects.bulk_create(bulk, ignore_conflicts=True)
bulk = []
if len(bulk) > 0:
# insert the remaining
self.stdout.write(f"{len(bulk)} new Anatomical Entities created.")
AnatomicalEntity.objects.bulk_create(bulk, ignore_conflicts=True)
try:
with open(csv_file, newline="", encoding="utf-8", errors="ignore") as csvfile:
reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')
for current_line, row in enumerate(reader, start=1):
if current_line % 100 == 0:
self.stdout.write(self.style.NOTICE(f"Processing line {current_line}"))

ontology_uri = row[URI].strip()
name = row[NAME].strip()
synonym = row[SYNONYM].strip() if row[SYNONYM] else None

self._process_anatomical_entity(name, ontology_uri, synonym, show_complete_logs, processed_uris,
unique_synonyms)

if len(unique_synonyms) >= BULK_LIMIT:
Synonym.objects.bulk_create(unique_synonyms.values(), ignore_conflicts=True)
unique_synonyms.clear()

except FileNotFoundError:
self.stdout.write(self.style.ERROR(f"File {csv_file} does not exist."))
except Exception as e:
self.stdout.write(self.style.ERROR(f"An error occurred while processing {csv_file}: {e}"))

# Ensure any remaining synonyms are created
if unique_synonyms:
try:
Synonym.objects.bulk_create(unique_synonyms.values(), ignore_conflicts=True)
except Exception as e:
self.stdout.write(self.style.ERROR(f"An error occurred during bulk creation: {e}"))

end_time = time.time()
self.stdout.write(self.style.SUCCESS(f"Operation completed in {end_time - start_time:.2f} seconds."))
36 changes: 36 additions & 0 deletions backend/composer/migrations/0041_synonym.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Generated by Django 4.1.4 on 2024-03-13 17:15

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("composer", "0040_auto_20240213_1301"),
]

operations = [
migrations.CreateModel(
name="Synonym",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(db_index=True, max_length=200)),
(
"anatomical_entity",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="synonyms",
to="composer.anatomicalentity",
),
),
],
),
]
58 changes: 58 additions & 0 deletions backend/composer/migrations/0042_auto_20240313_1718.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Generated by Django 4.1.4 on 2024-03-13 17:18

from django.db import migrations


def deduplicate_anatomical_entities(apps, schema_editor):
AnatomicalEntity = apps.get_model('composer', 'AnatomicalEntity')
Synonym = apps.get_model('composer', 'Synonym')
ConnectivityStatement = apps.get_model('composer', 'ConnectivityStatement')
Destination = apps.get_model('composer', 'Destination')
Via = apps.get_model('composer', 'Via')

primary_anatomical_entities = {}

for entity in AnatomicalEntity.objects.all().order_by('id'):
if entity.ontology_uri in primary_anatomical_entities:
# If the ontology_uri is a duplicate, move this entity to Synonym
primary_entity = AnatomicalEntity.objects.get(id=primary_anatomical_entities[entity.ontology_uri])
# Create a synonym for the duplicate entity
Synonym.objects.create(anatomical_entity=primary_entity, name=entity.name)

# Update ConnectivityStatement origins to point to the primary entity
for cs in ConnectivityStatement.objects.filter(origins=entity):
cs.origins.remove(entity)
cs.origins.add(primary_entity)

# Update Destination and Via for anatomical_entities and from_entities
for destination in Destination.objects.filter(anatomical_entities=entity):
destination.anatomical_entities.remove(entity)
destination.anatomical_entities.add(primary_entity)

for destination in Destination.objects.filter(from_entities=entity):
destination.from_entities.remove(entity)
destination.from_entities.add(primary_entity)

for via in Via.objects.filter(anatomical_entities=entity):
via.anatomical_entities.remove(entity)
via.anatomical_entities.add(primary_entity)

for via in Via.objects.filter(from_entities=entity):
via.from_entities.remove(entity)
via.from_entities.add(primary_entity)

# Finally, delete the duplicate entity
entity.delete()

else:
primary_anatomical_entities[entity.ontology_uri] = entity.id


class Migration(migrations.Migration):
dependencies = [
("composer", "0041_synonym"),
]

operations = [
migrations.RunPython(deduplicate_anatomical_entities),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.1.4 on 2024-03-13 17:30

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("composer", "0042_auto_20240313_1718"),
]

operations = [
migrations.AlterField(
model_name="anatomicalentity",
name="ontology_uri",
field=models.URLField(unique=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.1.4 on 2024-03-13 18:49

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("composer", "0043_alter_anatomicalentity_ontology_uri"),
]

operations = [
migrations.RemoveConstraint(
model_name="anatomicalentity",
name="ae_unique_upper_name",
),
]
16 changes: 16 additions & 0 deletions backend/composer/migrations/0045_alter_synonym_unique_together.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.1.4 on 2024-03-18 17:57

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("composer", "0044_remove_anatomicalentity_ae_unique_upper_name"),
]

operations = [
migrations.AlterUniqueTogether(
name="synonym",
unique_together={("anatomical_entity", "name")},
),
]
13 changes: 9 additions & 4 deletions backend/composer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,22 @@ class AnatomicalEntity(models.Model):
"""Anatomical Entity"""

name = models.CharField(max_length=200, db_index=True)
ontology_uri = models.URLField()
ontology_uri = models.URLField(unique=True)

def __str__(self):
return self.name

class Meta:
ordering = ["name"]
verbose_name_plural = "Anatomical Entities"
constraints = [
models.UniqueConstraint(Upper("name"), name="ae_unique_upper_name")
]


class Synonym(models.Model):
anatomical_entity = models.ForeignKey(AnatomicalEntity, on_delete=models.CASCADE, related_name="synonyms")
name = models.CharField(max_length=200, db_index=True)

class Meta:
unique_together = ('anatomical_entity', 'name',)


class Tag(models.Model):
Expand Down
Loading

0 comments on commit b959c8c

Please sign in to comment.