Skip to content

Commit

Permalink
Merge pull request #1655 from Princeton-CDH/feature/1641-places-metad…
Browse files Browse the repository at this point in the history
…ata-export

Add admin metadata export for Places (#1641)
  • Loading branch information
blms authored Oct 17, 2024
2 parents 180de45 + 541ad8b commit 481e6e6
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 6 deletions.
23 changes: 21 additions & 2 deletions geniza/entities/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@
PlacePersonForm,
PlacePlaceForm,
)
from geniza.entities.metadata_export import AdminPersonExporter
from geniza.entities.metadata_export import AdminPersonExporter, AdminPlaceExporter
from geniza.entities.models import (
DocumentPlaceRelation,
DocumentPlaceRelationType,
Event,
Name,
PastPersonSlug,
Person,
PersonDocumentRelation,
PersonDocumentRelationType,
Expand Down Expand Up @@ -526,6 +525,26 @@ def get_queryset(self, request):
)
)

@admin.display(description="Export selected places to CSV")
def export_to_csv(self, request, queryset=None):
"""Stream tabular data as a CSV file"""
queryset = queryset or self.get_queryset(request)
exporter = AdminPlaceExporter(queryset=queryset, progress=False)
return exporter.http_export_data_csv()

def get_urls(self):
"""Return admin urls; adds custom URL for exporting as CSV"""
urls = [
path(
"csv/",
self.admin_site.admin_view(self.export_to_csv),
name="place-csv",
),
]
return urls + super().get_urls()

actions = (export_to_csv,)


@admin.register(PlacePlaceRelationType)
class PlacePlaceRelationTypeAdmin(TabbedTranslationAdmin, admin.ModelAdmin):
Expand Down
173 changes: 172 additions & 1 deletion geniza/entities/metadata_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
from geniza.corpus.dates import standard_date_display
from geniza.corpus.models import Document
from geniza.entities.models import (
DocumentPlaceRelation,
DocumentPlaceRelationType,
Event,
Person,
PersonDocumentRelation,
PersonPlaceRelation,
PersonPlaceRelationType,
Place,
PlaceEventRelation,
)


Expand Down Expand Up @@ -52,7 +56,7 @@ class PublicPersonExporter(Exporter):
}

def __init__(self, queryset=None, progress=False):
"""Adds fields to the CSV based on PersonPlaceRelationType names"""
"""Adds fields to the export based on PersonPlaceRelationType names"""
self.csv_fields[9:9] = [
slugify(ppr_type.name).replace("-", "_")
for ppr_type in PersonPlaceRelationType.objects.order_by("name")
Expand Down Expand Up @@ -154,3 +158,170 @@ def get_export_data_dict(self, person):
] = f"{self.url_scheme}{self.site_domain}/admin/entities/person/{person.id}/change/"

return outd


class PublicPlaceExporter(Exporter):
"""
A subclass of :class:`geniza.common.metadata_export.Exporter` that
exports information relating to :class:`~geniza.entities.models.Place`.
Extends :meth:`get_queryset` and :meth:`get_export_data_dict`.
"""

model = Place
csv_fields = [
"name",
"name_variants",
"coordinates",
"notes",
"url",
]

# queryset filter for content types included in this export
content_type_filter = {
"content_type__app_label__in": ["entities", "corpus"],
"content_type__model__in": [
"Document",
"DocumentPlaceRelation",
"DocumentPlaceRelationType",
"Name",
"Person",
"PersonPlaceRelation",
"PersonPlaceRelationType",
"Place",
],
}

def get_queryset(self):
"""
Applies some prefetching to the base Exporter's get_queryset functionality.
:return: Custom-given query set or query set of all people
:rtype: QuerySet
"""
qset = self.queryset or self.model.objects.all()
# clear existing prefetches and then add the ones we need
qset = (
qset.prefetch_related(None)
.prefetch_related(
"names",
"events",
Prefetch(
"personplacerelation_set",
queryset=PersonPlaceRelation.objects.select_related("type"),
),
Prefetch(
"documentplacerelation_set",
queryset=DocumentPlaceRelation.objects.select_related("type"),
),
)
.order_by("slug")
)
return qset

def get_export_data_dict(self, place):
"""
Get back data about a place in dictionary format.
:param place: A given Place object
:type place: Place
:return: Dictionary of data about the place
:rtype: dict
"""
outd = {
"name": str(place),
"name_variants": ", ".join(
sorted([n.name for n in place.names.non_primary()])
),
"coordinates": place.coordinates,
"notes": place.notes,
"url": place.permalink,
}

return outd


class AdminPlaceExporter(PublicPlaceExporter):
csv_fields = PublicPlaceExporter.csv_fields + [
"events",
"url_admin",
]

def __init__(self, queryset=None, progress=False):
"""Adds fields to the export based on relation type names"""
rel_types = [
("people", PersonPlaceRelationType.objects.order_by("name")),
("documents", DocumentPlaceRelationType.objects.order_by("name")),
]
self.csv_fields[5:5] = [
slugify(rel_type.name).replace("-", "_") + f"_{rel_class}"
for (rel_class, rts) in rel_types
for rel_type in rts
]
super().__init__(queryset, progress)

def get_export_data_dict(self, place):
"""
Adding certain fields to PublicPlaceExporter.get_export_data_dict that are admin-only.
"""
outd = super().get_export_data_dict(place)

# grop related people by relation type name
related_people = PersonPlaceRelation.objects.filter(place__id=place.id).values(
"person__id", "type__name"
)
rel_types = related_people.values_list("type__name", flat=True).distinct()
related_person_ids = {}
for type_name in rel_types:
related_person_ids[type_name] = (
related_people.filter(type__name=type_name)
.values_list("person__id", flat=True)
.distinct()
)

# get names of related people (grouped by type name) and set on output dict
for [type_name, person_ids] in related_person_ids.items():
tn = slugify(type_name).replace("-", "_")
outd[f"{tn}_people"] = ", ".join(
sorted(
[str(person) for person in Person.objects.filter(id__in=person_ids)]
)
)

# grop related documents by relation type name
related_docs = DocumentPlaceRelation.objects.filter(place__id=place.id).values(
"document__id", "type__name"
)
rel_types = related_docs.values_list("type__name", flat=True).distinct()
related_doc_ids = {}
for type_name in rel_types:
related_doc_ids[type_name] = (
related_docs.filter(type__name=type_name)
.values_list("document__id", flat=True)
.distinct()
)

# get names of related documents (grouped by type name) and set on output dict
for [type_name, doc_ids] in related_doc_ids.items():
tn = slugify(type_name).replace("-", "_")
outd[f"{tn}_documents"] = ", ".join(
sorted([str(doc) for doc in Document.objects.filter(id__in=doc_ids)])
)

# get names of related events and set on output dict
related_event_ids = (
PlaceEventRelation.objects.filter(place__id=place.id)
.values_list("event__id", flat=True)
.distinct()
)
related_events = Event.objects.filter(id__in=related_event_ids)
outd["events"] = ", ".join(
[e.name for e in related_events.order_by("standard_date", "name")]
)

# add admin url
outd[
"url_admin"
] = f"{self.url_scheme}{self.site_domain}/admin/entities/place/{place.id}/change/"

return outd
2 changes: 1 addition & 1 deletion geniza/entities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,7 @@ def related_delete(sender, instance=None, raw=False, **_kwargs):
PlaceSignalHandlers.related_change(instance, raw, "delete")


class Place(ModelIndexable, SlugMixin):
class Place(ModelIndexable, SlugMixin, PermalinkMixin):
"""A named geographical location, which may be associated with documents or people."""

names = GenericRelation(Name, related_query_name="place")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{% extends 'admin/change_list.html' %}

{% block object-tools-items %}
<li><a href="{% url 'admin:place-csv' %}" class="">Download all as CSV</a></li>
{{ block.super }}
{% endblock %}
23 changes: 23 additions & 0 deletions geniza/entities/tests/test_entities_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,26 @@ def test_get_queryset(self):
# queryset should include name_unaccented field without diacritics
qs = place_admin.get_queryset(Mock())
assert qs.filter(name_unaccented__icontains="fustat").exists()

@pytest.mark.django_db
def test_export_to_csv(self):
# adapted from document csv export tests
mosul = Place.objects.create(slug="mosul", notes="A city in Iraq")
Name.objects.create(content_object=mosul, name="Mosul", primary=True)
fustat = Place.objects.create(slug="fustat")
Name.objects.create(content_object=fustat, name="Fusṭāṭ", primary=True)

place_admin = PlaceAdmin(model=Place, admin_site=admin.site)
response = place_admin.export_to_csv(Mock())
assert isinstance(response, StreamingHttpResponse)
# consume the binary streaming content and decode to inspect as str
content = b"".join([val for val in response.streaming_content]).decode()

# spot-check that we get expected data
# - header row
assert "name,name_variants," in content
# - some content
assert str(mosul) in content
assert mosul.notes in content
assert str(fustat) in content
assert fustat.permalink in content
71 changes: 69 additions & 2 deletions geniza/entities/tests/test_entities_metadata_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@
import pytest
from django.utils import timezone

from geniza.entities.metadata_export import AdminPersonExporter
from geniza.entities.metadata_export import AdminPersonExporter, AdminPlaceExporter
from geniza.entities.models import (
DocumentPlaceRelation,
DocumentPlaceRelationType,
Event,
Name,
Person,
PersonPersonRelation,
PersonPersonRelationType,
PersonPlaceRelation,
PersonPlaceRelationType,
Place,
PlaceEventRelation,
)

# adapted from corpus/tests/test_metadata_export.py
Expand Down Expand Up @@ -57,7 +61,7 @@ def test_person_exporter_cli(person, person_multiname):


@pytest.mark.django_db
def test_iter_dicts(person, person_diacritic, person_multiname, document, join):
def test_person_iter_dicts(person, person_diacritic, person_multiname, document, join):
# Create some relationships
person.has_page = True
person.save()
Expand Down Expand Up @@ -114,3 +118,66 @@ def test_iter_dicts(person, person_diacritic, person_multiname, document, join):
assert export_data.get("related_documents_count") == 0
# should be in alphabetical order
assert "Fusṭāṭ, Mosul" in export_data.get("family_traces_roots_to")


@pytest.mark.django_db
def test_place_iter_dicts(person, person_multiname, document, join):
# create some places
mosul = Place.objects.create(slug="mosul", notes="A city in Iraq")
Name.objects.create(content_object=mosul, name="Mosul", primary=True)
Name.objects.create(content_object=mosul, name="الموصل", primary=False)
fustat = Place.objects.create(slug="fustat")
Name.objects.create(content_object=fustat, name="Fusṭāṭ", primary=True)

# create some relationships
(home_base, _) = PersonPlaceRelationType.objects.get_or_create(name_en="Home base")
(roots, _) = PersonPlaceRelationType.objects.get_or_create(
name_en="Family traces roots to"
)
PersonPlaceRelation.objects.create(person=person, place=mosul, type=home_base)
PersonPlaceRelation.objects.create(person=person, place=fustat, type=roots)
PersonPlaceRelation.objects.create(
person=person_multiname, place=fustat, type=roots
)
(dest, _) = DocumentPlaceRelationType.objects.get_or_create(name="Destination")
(ment, _) = DocumentPlaceRelationType.objects.get_or_create(
name="Possibly mentioned"
)
DocumentPlaceRelation.objects.create(place=fustat, type=dest, document=document)
DocumentPlaceRelation.objects.create(place=fustat, type=dest, document=join)
DocumentPlaceRelation.objects.create(place=mosul, type=ment, document=join)
evt1 = Event.objects.create(name="Somebody went to Fustat", standard_date="1000")
PlaceEventRelation.objects.create(place=fustat, event=evt1)
evt2 = Event.objects.create(
name="Somebody else went to Fustat", standard_date="1010"
)
PlaceEventRelation.objects.create(place=fustat, event=evt2)

# test the export dict
pqs = Place.objects.all().order_by("slug")
exporter = AdminPlaceExporter(queryset=pqs)

for place, export_data in zip(pqs, exporter.iter_dicts()):
assert str(place) == export_data.get("name")
for n in place.names.non_primary():
assert str(n) in export_data.get("name_variants")
assert (
f"https://example.com/admin/entities/place/{place.id}/change/"
== export_data.get("url_admin")
)
assert place.permalink == export_data.get("url")
if str(place) == str(fustat):
# should snake-case each relation type name and append related object
# type (i.e. _people, _documents)
assert str(person) in export_data.get("family_traces_roots_to_people")
assert str(person_multiname) in export_data.get(
"family_traces_roots_to_people"
)
assert str(document) in export_data.get("destination_documents")
assert str(join) in export_data.get("destination_documents")
assert "Somebody went" in export_data.get("events")
assert "Somebody else went" in export_data.get("events")
elif str(place) == str(mosul):
assert "Iraq" in export_data.get("notes")
assert str(person) in export_data.get("home_base_people")
assert str(join) in export_data.get("possibly_mentioned_documents")

0 comments on commit 481e6e6

Please sign in to comment.