From f403694b008b3c1e2e0428d23bde8443d4732461 Mon Sep 17 00:00:00 2001 From: Tarek Ziade Date: Wed, 27 Sep 2023 09:20:16 +0200 Subject: [PATCH] Find endpoint returns wrong resource ids (#1376) * added a test * reproduce the issue (seralize returns None) * verify we issue a warning --- nucliadb/nucliadb/search/search/find_merge.py | 4 ++ .../nucliadb/tests/integration/test_find.py | 63 +++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/nucliadb/nucliadb/search/search/find_merge.py b/nucliadb/nucliadb/search/search/find_merge.py index e46927b0c9..aa29bcfabf 100644 --- a/nucliadb/nucliadb/search/search/find_merge.py +++ b/nucliadb/nucliadb/search/search/find_merge.py @@ -106,6 +106,10 @@ async def set_resource_metadata_value( ) if serialized_resource is not None: find_resources[resource].updated_from(serialized_resource) + else: + logger.warning(f"Resource {resource} not found in {kbid}") + find_resources.pop(resource, None) + finally: max_operations.release() diff --git a/nucliadb/nucliadb/tests/integration/test_find.py b/nucliadb/nucliadb/tests/integration/test_find.py index 07ff00960d..352d2f0c7f 100644 --- a/nucliadb/nucliadb/tests/integration/test_find.py +++ b/nucliadb/nucliadb/tests/integration/test_find.py @@ -18,6 +18,7 @@ # along with this program. If not, see . import asyncio +from unittest.mock import patch import pytest from httpx import AsyncClient @@ -190,3 +191,65 @@ async def test_find_min_score( ) assert resp.status_code == 200 assert resp.json()["min_score"] == 0.5 + + +@pytest.mark.asyncio +async def test_story_7286( + nucliadb_reader: AsyncClient, + nucliadb_writer: AsyncClient, + nucliadb_grpc: WriterStub, + knowledgebox, + caplog, +): + resp = await nucliadb_writer.post( + f"/kb/{knowledgebox}/resources", + json={ + "slug": "myresource", + "title": "My Title", + "summary": "My summary", + "icon": "text/plain", + }, + ) + assert resp.status_code == 201 + rid = resp.json()["uuid"] + + resp = await nucliadb_writer.patch( + f"/kb/{knowledgebox}/resource/{rid}", + json={ + "fieldmetadata": [ + { + "field": { + "field": "text1", + "field_type": "text", + }, + "paragraphs": [ + { + "key": f"{rid}/t/text1/0-7", + "classifications": [{"labelset": "ls1", "label": "label"}], + } + ], + } + ] + }, + ) + assert resp.status_code == 200 + + with patch("nucliadb.search.search.find_merge.serialize", return_value=None): + # should get no result (because serialize returns None, as the resource is not found in the DB) + resp = await nucliadb_reader.post( + f"/kb/{knowledgebox}/find", + json={ + "query": "title", + "features": ["paragraph", "vector", "relations"], + "shards": [], + "highlight": True, + "autofilter": False, + "page_number": 0, + "show": ["basic", "values", "origin"], + "filters": [], + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert len(body["resources"]) == 0 + assert caplog.record_tuples[0][2] == f"Resource {rid} not found in {knowledgebox}"