From f403694b008b3c1e2e0428d23bde8443d4732461 Mon Sep 17 00:00:00 2001
From: Tarek Ziade <tarek@ziade.org>
Date: Wed, 27 Sep 2023 09:20:16 +0200
Subject: [PATCH] Find endpoint returns wrong resource ids (#1376)

* added a test

* reproduce the issue (seralize returns None)

* verify we issue a warning
---
 nucliadb/nucliadb/search/search/find_merge.py |  4 ++
 .../nucliadb/tests/integration/test_find.py   | 63 +++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/nucliadb/nucliadb/search/search/find_merge.py b/nucliadb/nucliadb/search/search/find_merge.py
index e46927b0c9..aa29bcfabf 100644
--- a/nucliadb/nucliadb/search/search/find_merge.py
+++ b/nucliadb/nucliadb/search/search/find_merge.py
@@ -106,6 +106,10 @@ async def set_resource_metadata_value(
         )
         if serialized_resource is not None:
             find_resources[resource].updated_from(serialized_resource)
+        else:
+            logger.warning(f"Resource {resource} not found in {kbid}")
+            find_resources.pop(resource, None)
+
     finally:
         max_operations.release()
 
diff --git a/nucliadb/nucliadb/tests/integration/test_find.py b/nucliadb/nucliadb/tests/integration/test_find.py
index 07ff00960d..352d2f0c7f 100644
--- a/nucliadb/nucliadb/tests/integration/test_find.py
+++ b/nucliadb/nucliadb/tests/integration/test_find.py
@@ -18,6 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
 import asyncio
+from unittest.mock import patch
 
 import pytest
 from httpx import AsyncClient
@@ -190,3 +191,65 @@ async def test_find_min_score(
     )
     assert resp.status_code == 200
     assert resp.json()["min_score"] == 0.5
+
+
+@pytest.mark.asyncio
+async def test_story_7286(
+    nucliadb_reader: AsyncClient,
+    nucliadb_writer: AsyncClient,
+    nucliadb_grpc: WriterStub,
+    knowledgebox,
+    caplog,
+):
+    resp = await nucliadb_writer.post(
+        f"/kb/{knowledgebox}/resources",
+        json={
+            "slug": "myresource",
+            "title": "My Title",
+            "summary": "My summary",
+            "icon": "text/plain",
+        },
+    )
+    assert resp.status_code == 201
+    rid = resp.json()["uuid"]
+
+    resp = await nucliadb_writer.patch(
+        f"/kb/{knowledgebox}/resource/{rid}",
+        json={
+            "fieldmetadata": [
+                {
+                    "field": {
+                        "field": "text1",
+                        "field_type": "text",
+                    },
+                    "paragraphs": [
+                        {
+                            "key": f"{rid}/t/text1/0-7",
+                            "classifications": [{"labelset": "ls1", "label": "label"}],
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+    assert resp.status_code == 200
+
+    with patch("nucliadb.search.search.find_merge.serialize", return_value=None):
+        # should get no result (because serialize returns None, as the resource is not found in the DB)
+        resp = await nucliadb_reader.post(
+            f"/kb/{knowledgebox}/find",
+            json={
+                "query": "title",
+                "features": ["paragraph", "vector", "relations"],
+                "shards": [],
+                "highlight": True,
+                "autofilter": False,
+                "page_number": 0,
+                "show": ["basic", "values", "origin"],
+                "filters": [],
+            },
+        )
+        assert resp.status_code == 200
+    body = resp.json()
+    assert len(body["resources"]) == 0
+    assert caplog.record_tuples[0][2] == f"Resource {rid} not found in {knowledgebox}"