Skip to content

Commit

Permalink
SCKAN-274 feat: Add constraint unique together to synonym
Browse files Browse the repository at this point in the history
  • Loading branch information
afonsobspinto committed Mar 18, 2024
1 parent ea4a055 commit 12c299a
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 20 deletions.
40 changes: 23 additions & 17 deletions backend/composer/management/commands/ingest_anatomical_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@


class Command(BaseCommand):
help = "Ingests Anatomical Entities CSV file(s), with robust error handling."
help = "Ingests Anatomical Entities CSV file(s)."

def add_arguments(self, parser):
parser.add_argument("csv_files", nargs="+", type=str)
parser.add_argument("--show_complete_logs", action='store_true',
help="Show detailed logs during processing")

def _process_anatomical_entity(self, name, ontology_uri, synonym, show_complete_logs, processed_uris,
synonym_accumulator):
unique_synonyms):
try:
is_first_occurrence = ontology_uri not in processed_uris

Expand All @@ -34,26 +34,29 @@ def _process_anatomical_entity(self, name, ontology_uri, synonym, show_complete_
anatomical_entity.save()
if show_complete_logs:
self.stdout.write(
self.style.SUCCESS(f"Updated {anatomical_entity.ontology_uri} name to {name}."))
self.style.SUCCESS(f"Updated {anatomical_entity.ontology_uri} name to {name}.")
)

processed_uris.add(ontology_uri)

if synonym and synonym.lower() not in [s.alias.lower() for s in anatomical_entity.synonyms.all()]:
synonym_accumulator.append(Synonym(anatomical_entity=anatomical_entity, alias=synonym))
if show_complete_logs:
self.stdout.write(
self.style.SUCCESS(f"Synonym '{synonym}' added for {anatomical_entity.ontology_uri}."))
synonym_key = (ontology_uri, synonym.lower()) if synonym else None
if synonym and synonym_key not in unique_synonyms:
if not Synonym.objects.filter(anatomical_entity=anatomical_entity, name__iexact=synonym).exists():
unique_synonyms[synonym_key] = Synonym(anatomical_entity=anatomical_entity, name=synonym)
if show_complete_logs:
self.stdout.write(
self.style.SUCCESS(f"Synonym '{synonym}' added for {anatomical_entity.ontology_uri}."))
except IntegrityError as e:
self.stdout.write(self.style.ERROR(f"Error processing {ontology_uri}: {e}"))

@transaction.atomic
def handle(self, *args, **options):
start_time = time.time()
show_complete_logs = options['show_complete_logs']
synonym_accumulator = []
unique_synonyms = {}
processed_uris = set()

for csv_file in options["csv_files"]:
processed_uris = set()
try:
with open(csv_file, newline="", encoding="utf-8", errors="ignore") as csvfile:
reader = csv.DictReader(csvfile, delimiter=",", quotechar='"')
Expand All @@ -65,21 +68,24 @@ def handle(self, *args, **options):
name = row[NAME].strip()
synonym = row[SYNONYM].strip() if row[SYNONYM] else None

self._process_anatomical_entity(name, ontology_uri, synonym, show_complete_logs,
processed_uris, synonym_accumulator)
self._process_anatomical_entity(name, ontology_uri, synonym, show_complete_logs, processed_uris,
unique_synonyms)

if len(synonym_accumulator) >= BULK_LIMIT:
Synonym.objects.bulk_create(synonym_accumulator)
synonym_accumulator = []
if len(unique_synonyms) >= BULK_LIMIT:
Synonym.objects.bulk_create(unique_synonyms.values(), ignore_conflicts=True)
unique_synonyms.clear()

except FileNotFoundError:
self.stdout.write(self.style.ERROR(f"File {csv_file} does not exist."))
except Exception as e:
self.stdout.write(self.style.ERROR(f"An error occurred while processing {csv_file}: {e}"))

# Ensure any remaining synonyms are created
if synonym_accumulator:
Synonym.objects.bulk_create(synonym_accumulator)
if unique_synonyms:
try:
Synonym.objects.bulk_create(unique_synonyms.values(), ignore_conflicts=True)
except Exception as e:
self.stdout.write(self.style.ERROR(f"An error occurred during bulk creation: {e}"))

end_time = time.time()
self.stdout.write(self.style.SUCCESS(f"Operation completed in {end_time - start_time:.2f} seconds."))
2 changes: 1 addition & 1 deletion backend/composer/migrations/0041_synonym.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
("alias", models.CharField(db_index=True, max_length=200)),
("name", models.CharField(db_index=True, max_length=200)),
(
"anatomical_entity",
models.ForeignKey(
Expand Down
2 changes: 1 addition & 1 deletion backend/composer/migrations/0042_auto_20240313_1718.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def deduplicate_anatomical_entities(apps, schema_editor):
# If the ontology_uri is a duplicate, move this entity to Synonym
primary_entity = AnatomicalEntity.objects.get(id=primary_anatomical_entities[entity.ontology_uri])
# Create a synonym for the duplicate entity
Synonym.objects.create(anatomical_entity=primary_entity, alias=entity.name)
Synonym.objects.create(anatomical_entity=primary_entity, name=entity.name)

# Update ConnectivityStatement origins to point to the primary entity
for cs in ConnectivityStatement.objects.filter(origins=entity):
Expand Down
16 changes: 16 additions & 0 deletions backend/composer/migrations/0045_alter_synonym_unique_together.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.1.4 on 2024-03-18 17:57

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("composer", "0044_remove_anatomicalentity_ae_unique_upper_name"),
]

operations = [
migrations.AlterUniqueTogether(
name="synonym",
unique_together={("anatomical_entity", "name")},
),
]
5 changes: 4 additions & 1 deletion backend/composer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ class Meta:

class Synonym(models.Model):
anatomical_entity = models.ForeignKey(AnatomicalEntity, on_delete=models.CASCADE, related_name="synonyms")
alias = models.CharField(max_length=200, db_index=True)
name = models.CharField(max_length=200, db_index=True)

class Meta:
unique_together = ('anatomical_entity', 'name',)


class Tag(models.Model):
Expand Down

0 comments on commit 12c299a

Please sign in to comment.