Skip to content

Commit

Permalink
[WIP]various fixes and feats:
Browse files Browse the repository at this point in the history
- fix user synchro mtd triggered by user actions
- feat improve debug logging for mtd sync process
- fix creation of user "Contact Principal for orphan metadata"
- fix dataset_name truncating and limiting to 255 char
  • Loading branch information
VincentCauchois committed Nov 20, 2024
1 parent 93b6eb9 commit e9a739f
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 13 deletions.
67 changes: 56 additions & 11 deletions backend/geonature/core/gn_meta/mtd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import time
from urllib.parse import urljoin

from lxml import etree
import requests

Expand All @@ -28,15 +27,19 @@

# create logger
logger = logging.getLogger("MTD_SYNC")
level_log_mtd_sync = config["MTD"]["SYNC_LOG_LEVEL"]
# config logger
logger.setLevel(config["MTD"]["SYNC_LOG_LEVEL"])
logger.setLevel(level_log_mtd_sync)
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s | %(levelname)s : %(message)s", "%Y-%m-%d %H:%M:%S")
handler.setFormatter(formatter)
logger.addHandler(handler)
# avoid logging output dupplication
logger.propagate = False

if level_log_mtd_sync == "DEBUG":
from sqlalchemy import exists
from geonature.core.gn_meta.models.datasets import TDatasets

class MTDInstanceApi:
af_path = "/mtd/cadre/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
Expand Down Expand Up @@ -194,30 +197,49 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
list_cd_nomenclature = db.session.scalars(
select(TNomenclatures.cd_nomenclature).distinct()
).all()
# TODO: remove following line
user_add_total_time = 0
logger.debug("MTD - PROCESS AF LIST")
if level_log_mtd_sync == "DEBUG":
nb_af = len(af_list)
nb_ds = len(ds_list)
logger.debug(f"Number of AF to process : {nb_af}")
logger.debug(f"Number of DS to process : {nb_ds}")
nb_updated_af = 0
nb_updated_ds = 0
nb_retrieved_new_af = 0
nb_retrieved_new_ds = 0
for af in af_list:
actors = af.pop("actors")
with db.session.begin_nested():
start_add_user_time = time.time()
add_unexisting_digitizer(af["id_digitizer"] if not id_role else id_role)
# TODO: remove following line
user_add_total_time += time.time() - start_add_user_time
if level_log_mtd_sync == "DEBUG":
af_already_exists = db.session.scalar(
exists().where(TAcquisitionFramework.unique_acquisition_framework_id == af["unique_acquisition_framework_id"]).select()
)
af = sync_af(af)
# TODO: choose whether or not to commit retrieval of the AF before association of actors
# and possibly retrieve an AF without any actor associated to it
# Commit here to retrieve the AF even if the association of actors that follows is to fail
db.session.commit()
# If the AF has not been retrieved, associated actors cannot be retrieved either
# and thus we continue to the next AF
if not af:
continue
associate_actors(
actors,
CorAcquisitionFrameworkActor,
"id_acquisition_framework",
af.id_acquisition_framework,
af.unique_acquisition_framework_id,
)
if af is not None:
if level_log_mtd_sync == "DEBUG":
if af_already_exists:
nb_updated_af += 1
else:
nb_retrieved_new_af += 1
associate_actors(
actors,
CorAcquisitionFrameworkActor,
"id_acquisition_framework",
af.id_acquisition_framework,
af.unique_acquisition_framework_id,
)
# TODO: check the following TODO:
# TODO: remove actors removed from MTD
db.session.commit()
Expand All @@ -231,9 +253,25 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
add_unexisting_digitizer(ds["id_digitizer"])
else:
add_unexisting_digitizer(id_role)
# TODO: remove following line
user_add_total_time += time.time() - start_add_user_time
if level_log_mtd_sync == "DEBUG":
ds_already_exists = db.session.scalar(
exists()
.where(
TDatasets.unique_dataset_id == ds["unique_dataset_id"],
)
.select()
)
ds = sync_ds(ds, list_cd_nomenclature)
# TODO: choose whether or not to commit retrieval of the DS before association of actors
##db.session.commit()
if ds is not None:
if level_log_mtd_sync == "DEBUG":
if ds_already_exists:
nb_updated_ds += 1
else:
nb_retrieved_new_ds += 1
associate_actors(
actors,
CorDatasetActor,
Expand All @@ -242,9 +280,16 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
ds.unique_dataset_id,
)

# TODO: remove following line
user_add_total_time = round(user_add_total_time, 2)

db.session.commit()

if level_log_mtd_sync == "DEBUG":
nb_ds_not_retrieved_or_not_updated = nb_ds - nb_updated_ds - nb_retrieved_new_ds
logger.debug(f"{nb_ds} DS processed : {nb_updated_ds} DS updated (including no change made) + {nb_retrieved_new_ds} new DS retrieved + {nb_ds_not_retrieved_or_not_updated} DS not retrieved or not updated")
nb_af_not_retrieved_or_not_updated = nb_af - nb_updated_af - nb_retrieved_new_af
logger.debug(f"{nb_af} AF processed : {nb_updated_af} AF updated (including no change made) + {nb_retrieved_new_af} new AF retrieved + {nb_af_not_retrieved_or_not_updated} AF not retrieved or not updated")

def sync_af_and_ds():
"""
Expand Down
2 changes: 1 addition & 1 deletion backend/geonature/core/gn_meta/mtd/mtd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def associate_actors(
else:
dict_data_generated_user = {
"id_role": id_user_contact_principal_for_orphan_metadata,
"desc_role": user_contact_principal_for_orphan_metadata,
"desc_role": desc_role_for_user_contact_principal_for_orphan_metadata,
}
dict_data_generated_user = insert_or_update_role(
data=dict_data_generated_user
Expand Down
2 changes: 1 addition & 1 deletion backend/geonature/core/gn_meta/mtd/xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def format_acquisition_framework_id_from_xml(provided_af_uuid) -> Union[str, Non
current_jdd = {
"unique_dataset_id": jdd_uuid,
"uuid_acquisition_framework": ca_uuid,
"dataset_name": dataset_name if len(dataset_name) < 256 else f"{dataset_name[:253]}...",
"dataset_name": dataset_name if len(dataset_name) < 256 else f"{dataset_name[:252]}...",
"dataset_shortname": dataset_shortname,
"dataset_desc": (
dataset_desc
Expand Down
2 changes: 2 additions & 0 deletions backend/geonature/core/gn_meta/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def synchronize_mtd():
list_id_af = params.get("id_acquisition_frameworks", [])
for id_af in list_id_af:
sync_af_and_ds_by_user(id_role=current_user.id_role, id_af=id_af)
if not list_id_af:
sync_af_and_ds_by_user(id_role=current_user.id_role)
except Exception as e:
log.exception(f"Error while get JDD via MTD: {e}")

Expand Down

0 comments on commit e9a739f

Please sign in to comment.