From ced3e15712b625c9736edf9b8e50aba9ae02bf03 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 17 Jul 2024 16:42:36 -0500 Subject: [PATCH] Use the update flag when updating fields of existing Elasticsearch entry (#1136) * make sure when updating fields of existing es entry use the update flag * force cast it to be dataset out * need to update metadata first before indexing it in es --- backend/app/routers/authorization.py | 39 ++++++++++++++++-------- backend/app/routers/files.py | 7 +++-- backend/app/routers/groups.py | 6 ++-- backend/app/routers/metadata_datasets.py | 9 +++--- backend/app/routers/metadata_files.py | 2 +- 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/backend/app/routers/authorization.py b/backend/app/routers/authorization.py index 5050db595..676a5c769 100644 --- a/backend/app/routers/authorization.py +++ b/backend/app/routers/authorization.py @@ -21,7 +21,6 @@ GroupAndRole, UserAndRole, ) -from app.models.files import FileDB, FileOut from app.models.groups import GroupDB from app.models.users import UserDB from app.routers.authentication import get_admin, get_admin_mode @@ -207,9 +206,9 @@ async def set_dataset_group_role( auth_db.user_ids.append(u.user.email) await auth_db.replace() await index_dataset( - es, DatasetOut(**dataset.dict()), auth_db.user_ids + es, DatasetOut(**dataset.dict()), auth_db.user_ids, update=True ) - await index_dataset_files(es, str(dataset_id)) + await index_dataset_files(es, str(dataset_id), update=True) if len(readonly_user_ids) > 0: readonly_auth_db = AuthorizationDB( creator=user_id, @@ -220,7 +219,10 @@ async def set_dataset_group_role( ) await readonly_auth_db.insert() await index_dataset( - es, DatasetOut(**dataset.dict()), readonly_auth_db.user_ids + es, + DatasetOut(**dataset.dict()), + readonly_auth_db.user_ids, + update=True, ) await index_dataset_files(es, str(dataset_id), update=True) return auth_db.dict() @@ -244,9 +246,12 @@ async def set_dataset_group_role( ) await readonly_auth_db.insert() await index_dataset( - es, DatasetOut(**dataset.dict()), readonly_auth_db.user_ids + es, + DatasetOut(**dataset.dict()), + readonly_auth_db.user_ids, + update=True, ) - await index_dataset_files(es, str(dataset_id)) + await index_dataset_files(es, str(dataset_id), update=True) if len(user_ids) > 0: auth_db = AuthorizationDB( creator=user_id, @@ -258,9 +263,9 @@ async def set_dataset_group_role( # if there are read only users add them with the role of viewer await auth_db.insert() await index_dataset( - es, DatasetOut(**dataset.dict()), auth_db.user_ids + es, DatasetOut(**dataset.dict()), auth_db.user_ids, update=True ) - await index_dataset_files(es, str(dataset_id)) + await index_dataset_files(es, str(dataset_id), update=True) return auth_db.dict() else: raise HTTPException(status_code=404, detail=f"Group {group_id} not found") @@ -314,7 +319,9 @@ async def set_dataset_user_role( else: auth_db.user_ids.append(username) await auth_db.save() - await index_dataset(es, DatasetOut(**dataset.dict()), auth_db.user_ids) + await index_dataset( + es, DatasetOut(**dataset.dict()), auth_db.user_ids, update=True + ) await index_dataset_files(es, dataset_id, update=True) return auth_db.dict() else: @@ -326,8 +333,10 @@ async def set_dataset_user_role( user_ids=[username], ) await auth_db.insert() - await index_dataset(es, DatasetOut(**dataset.dict()), [username]) - await index_dataset_files(es, dataset_id) + await index_dataset( + es, DatasetOut(**dataset.dict()), [username], update=True + ) + await index_dataset_files(es, dataset_id, update=True) return auth_db.dict() else: raise HTTPException(status_code=404, detail=f"User {username} not found") @@ -364,7 +373,9 @@ async def remove_dataset_group_role( auth_db.user_ids.remove(u.user.email) await auth_db.save() # Update elasticsearch index with new users - await index_dataset(es, DatasetOut(**dataset.dict()), auth_db.user_ids) + await index_dataset( + es, DatasetOut(**dataset.dict()), auth_db.user_ids, update=True + ) await index_dataset_files(es, str(dataset_id), update=True) return auth_db.dict() else: @@ -401,7 +412,9 @@ async def remove_dataset_user_role( auth_db.user_ids.remove(username) await auth_db.save() # Update elasticsearch index with updated users - await index_dataset(es, DatasetOut(**dataset.dict()), auth_db.user_ids) + await index_dataset( + es, DatasetOut(**dataset.dict()), auth_db.user_ids, update=True + ) await index_dataset_files(es, dataset_id, update=True) return auth_db.dict() else: diff --git a/backend/app/routers/files.py b/backend/app/routers/files.py index 003470dfc..0d078e48f 100644 --- a/backend/app/routers/files.py +++ b/backend/app/routers/files.py @@ -239,9 +239,12 @@ async def update_file( await new_version.insert() # Update entry to the file index - await index_file(es, FileOut(**updated_file.dict())) + await index_file(es, FileOut(**updated_file.dict()), update=True) await _resubmit_file_extractors( - updated_file, rabbitmq_client, user, credentials + FileOut(**updated_file.dict()), + rabbitmq_client=rabbitmq_client, + user=user, + credentials=credentials, ) # updating metadata in elasticsearch diff --git a/backend/app/routers/groups.py b/backend/app/routers/groups.py index 5ebbdb108..bc0bd5a49 100644 --- a/backend/app/routers/groups.py +++ b/backend/app/routers/groups.py @@ -267,7 +267,7 @@ async def add_member( ) ) is not None: await index_dataset( - es, DatasetOut(**dataset.dict()), auth.user_ids + es, DatasetOut(**dataset.dict()), auth.user_ids, update=True ) await index_dataset_files(es, str(auth.dataset_id), update=True) return group.dict() @@ -312,7 +312,9 @@ async def remove_member( if ( dataset := await DatasetDB.get(PydanticObjectId(auth.dataset_id)) ) is not None: - await index_dataset(es, DatasetOut(**dataset.dict()), auth.user_ids) + await index_dataset( + es, DatasetOut(**dataset.dict()), auth.user_ids, update=True + ) await index_dataset_files(es, str(auth.dataset_id), update=True) return group.dict() diff --git a/backend/app/routers/metadata_datasets.py b/backend/app/routers/metadata_datasets.py index a418592bd..3f8d2af21 100644 --- a/backend/app/routers/metadata_datasets.py +++ b/backend/app/routers/metadata_datasets.py @@ -111,7 +111,7 @@ async def add_dataset_metadata( await md.insert() # Add an entry to the metadata index - await index_dataset(es, dataset) + await index_dataset(es, DatasetOut(**dataset.dict()), update=True) return md.dict() @@ -163,7 +163,7 @@ async def replace_dataset_metadata( await md.replace() # Update entry to the metadata index - await index_dataset(es, dataset, update=True) + await index_dataset(es, DatasetOut(**dataset.dict()), update=True) return md.dict() else: raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found") @@ -229,8 +229,9 @@ async def update_dataset_metadata( md = await MetadataDB.find_one(*query) if md is not None: - await index_dataset(es, dataset, update=True) - return await patch_metadata(md, content, es) + patched_metadata = await patch_metadata(md, content, es) + await index_dataset(es, DatasetOut(**dataset.dict()), update=True) + return patched_metadata else: raise HTTPException( status_code=404, detail="Metadata matching the query not found" diff --git a/backend/app/routers/metadata_files.py b/backend/app/routers/metadata_files.py index 5cc4a83ae..9c7e68d66 100644 --- a/backend/app/routers/metadata_files.py +++ b/backend/app/routers/metadata_files.py @@ -148,7 +148,7 @@ async def add_file_metadata( await md.insert() # Add an entry to the metadata index - await index_file(es, FileOut(**file.dict())) + await index_file(es, FileOut(**file.dict()), update=True) return md.dict()