Skip to content

Commit

Permalink
Merge pull request #1497 from maykinmedia/task/2871-zgw-import-export
Browse files Browse the repository at this point in the history
[#2871] Refactor ZGW imports to work on different environments
  • Loading branch information
alextreme authored Nov 28, 2024
2 parents b534838 + 24fffb7 commit e589b3a
Show file tree
Hide file tree
Showing 9 changed files with 697 additions and 404 deletions.
26 changes: 23 additions & 3 deletions src/open_inwoner/openzaak/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.template.defaultfilters import filesizeformat
from django.template.response import TemplateResponse
from django.urls import path, reverse
from django.utils.html import format_html, format_html_join
from django.utils.translation import gettext_lazy as _, ngettext

from import_export.admin import ImportExportMixin
Expand Down Expand Up @@ -174,11 +175,30 @@ def process_file_view(self, request):
self.message_user(
request,
_(
"Successfully processed %(num_rows)d items"
% {"num_rows": import_result.total_rows_processed}
"%(num_rows)d item(s) processed in total, with %(error_rows)d failing row(s)."
% {
"num_rows": import_result.total_rows_processed,
"error_rows": len(import_result.import_errors),
}
),
messages.SUCCESS,
messages.SUCCESS
if not import_result.import_errors
else messages.WARNING,
)
if errors := import_result.import_errors:
msgs_deduped = set(error.__str__() for error in errors)
error_msg_iterator = ([msg] for msg in msgs_deduped)

error_msg_html = format_html_join(
"\n", "<li>{}</li>", error_msg_iterator
)
error_msg_html_ordered = format_html(
_("It was not possible to import the following items:")
+ f"<ol> {error_msg_html} </ol>"
)
self.message_user(
request, error_msg_html_ordered, messages.ERROR
)

return HttpResponseRedirect(
reverse(
Expand Down
264 changes: 197 additions & 67 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
from typing import IO, Any, Generator, Self
from urllib.parse import urlparse

from django.apps import apps
from django.core import serializers
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
from django.core.files.storage import Storage
from django.core.serializers.base import DeserializationError
from django.db import transaction
from django.db.models import QuerySet

Expand All @@ -22,6 +25,132 @@
logger = logging.getLogger(__name__)


class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
data = json.loads(jsonl) if jsonl else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
fields = data.get("fields", None)
if source_config is CatalogusConfig:
items = [
f"Domein = {fields['domein']}",
f"Rsin = {fields['rsin']}",
]
if source_config is ZaakTypeConfig:
items = [
f"Identificatie = {fields['identificatie']}",
f"Catalogus domein = {fields['catalogus'][0]}",
f"Catalogus rsin = {fields['catalogus'][1]}",
]
if source_config in {
ZaakTypeStatusTypeConfig,
ZaakTypeResultaatTypeConfig,
ZaakTypeInformatieObjectTypeConfig,
}:
items = [
f"omschrijving = {fields['omschrijving']}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]}",
f"Catalogus domein = {fields['zaaktype_config'][1]}",
f"Catalogus rsin = {fields['zaaktype_config'][2]}",
]

return {
"error_type": error_type,
"source_config_name": source_config.__name__,
"info": ", ".join(items),
}

@classmethod
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
error_data = cls.extract_error_data(exception, jsonl)

error_template = (
"%(source_config_name)s not found in target environment: %(info)s"
)
if error_data["error_type"] is MultipleObjectsReturned:
error_template = "Got multiple results for %(source_config_name)s: %(info)s"

return cls(error_template % error_data)


def check_catalogus_config_exists(source_config: CatalogusConfig, jsonl: str):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except CatalogusConfig.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)


def _update_config(source, target, exclude_fields):
for field in source._meta.fields:
field_name = field.name

if field_name in exclude_fields:
continue

val = getattr(source, field_name, None)
setattr(target, field_name, val)
target.save()


def _update_zaaktype_config(source_config: ZaakTypeConfig, jsonl: str):
try:
target = ZaakTypeConfig.objects.get_by_natural_key(
identificatie=source_config.identificatie,
catalogus_domein=source_config.catalogus.domein,
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
exclude_fields = [
"id",
"catalogus",
"urls",
"zaaktype_uuids",
]
_update_config(source_config, target, exclude_fields)


def _update_nested_zgw_config(
source_config: ZaakTypeStatusTypeConfig
| ZaakTypeResultaatTypeConfig
| ZaakTypeInformatieObjectTypeConfig,
exclude_fields: list[str],
jsonl: str,
):
zaaktype_config_identificatie = source_config.zaaktype_config.identificatie
catalogus_domein = source_config.zaaktype_config.catalogus.domein
catalogus_rsin = source_config.zaaktype_config.catalogus.rsin

try:
target = source_config.__class__.objects.get_by_natural_key(
omschrijving=source_config.omschrijving,
zaaktype_config_identificatie=zaaktype_config_identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=catalogus_rsin,
)
except (source_config.DoesNotExist, source_config.MultipleObjectsReturned) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
_update_config(source_config, target, exclude_fields)


@dataclasses.dataclass(frozen=True)
class CatalogusConfigExport:
"""Gather and export CatalogusConfig(s) and all associated relations."""
Expand Down Expand Up @@ -113,9 +242,10 @@ class CatalogusConfigImport:
total_rows_processed: int = 0
catalogus_configs_imported: int = 0
zaaktype_configs_imported: int = 0
zaak_inormatie_object_type_configs_imported: int = 0
zaak_informatie_object_type_configs_imported: int = 0
zaak_status_type_configs_imported: int = 0
zaak_resultaat_type_configs_imported: int = 0
import_errors: list | None = None

@staticmethod
def _get_url_root(url: str) -> str:
Expand Down Expand Up @@ -149,85 +279,85 @@ def _lines_iter_from_jsonl_stream_or_string(
# Reset the stream in case it gets re-used
lines.seek(0)

@classmethod
def _rewrite_jsonl_url_references(
cls, stream_or_string: IO | str
) -> Generator[str, Any, None]:
# The assumption is that the exporting and importing instance both have
# a `Service` with the same slug as the `Service` referenced in the
# `configued_from` attribute of the imported CatalogusConfig. The
# assumption is further that all URLs in the imported objects are
# prefixed by an URL that matches the API root in the service. Because
# of this, the import file will contain URLs with a base URL pointing to
# the `api_root`` of the `configured_from` Service on the _source_
# instance, and has to be re-written to match the `api_root` of the
# `configured_from` Service on the _target_ instance. Put differently,
# we assume that we are migrating ZGW objects that _do not differ_ as
# far as the ZGW objects themselves are concerned (apart from the URL,
# they essentially point to the same ZGW backend), but that they _do_
# differ in terms of additional model fields that do not have their
# source of truth in the ZGW backends.
#
# This expectation is also encoded in our API clients: you can only
# fetch ZGW objects using the ApePie clients if the root of those
# objects matches the configured API root.

base_url_mapping = {}
for deserialized_object in serializers.deserialize(
"jsonl",
filter(
lambda row: ('"model": "openzaak.catalogusconfig"' in row),
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string),
),
):
object_type: str = deserialized_object.object.__class__.__name__

if object_type == "CatalogusConfig":
target_base_url = cls._get_url_root(
deserialized_object.object.service.api_root
)
source_base_url = cls._get_url_root(deserialized_object.object.url)
base_url_mapping[source_base_url] = target_base_url
else:
# https://www.xkcd.com/2200/
logger.error(
"Tried to filter for catalogus config objects, but also got: %s",
object_type,
)

for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
source_url_found = False
for source, target in base_url_mapping.items():
line = line.replace(source, target)
source_url_found = True

if not source_url_found:
raise ValueError("Unable to rewrite ZGW urls")

yield line

@classmethod
@transaction.atomic()
def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
model_to_counter_mapping = {
"CatalogusConfig": "catalogus_configs_imported",
"ZaakTypeConfig": "zaaktype_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_inormatie_object_type_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_informatie_object_type_configs_imported",
"ZaakTypeStatusTypeConfig": "zaak_status_type_configs_imported",
"ZaakTypeResultaatTypeConfig": "zaak_resultaat_type_configs_imported",
}

object_type_counts = defaultdict(int)

for deserialized_object in serializers.deserialize(
"jsonl", cls._rewrite_jsonl_url_references(stream_or_string)
):
deserialized_object.save()
object_type = deserialized_object.object.__class__.__name__
object_type_counts[object_type] += 1
rows_successfully_processed = 0
import_errors = []
for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
try:
(deserialized_object,) = serializers.deserialize(
"jsonl",
line,
use_natural_primary_keys=True,
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
error = ZGWImportError.from_exception_and_jsonl(exc, line)
logger.error(error)
import_errors.append(error)
else:
source_config = deserialized_object.object
try:
match source_config:
case CatalogusConfig():
check_catalogus_config_exists(
source_config=source_config, jsonl=line
)
case ZaakTypeConfig():
_update_zaaktype_config(
source_config=source_config, jsonl=line
)
case ZaakTypeInformatieObjectTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"informatieobjecttype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
case ZaakTypeStatusTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"statustype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
case ZaakTypeResultaatTypeConfig():
exclude_fields = [
"id",
"zaaktype_config",
"zaaktype_uuids",
"resultaattype_url",
]
_update_nested_zgw_config(
source_config, exclude_fields, line
)
except ZGWImportError as exc:
logger.error(exc)
import_errors.append(exc)
else:
object_type = source_config.__class__.__name__
object_type_counts[object_type] += 1
rows_successfully_processed += 1

creation_kwargs = {
"total_rows_processed": sum(object_type_counts.values()),
"total_rows_processed": rows_successfully_processed + len(import_errors),
"import_errors": import_errors,
}

for model_name, counter_field in model_to_counter_mapping.items():
Expand Down
Loading

0 comments on commit e589b3a

Please sign in to comment.