Find endpoint returns wrong resource ids (#1376)

* added a test * reproduce the issue (seralize returns None) * verify we issue a warning
nuclia · Sep 27, 2023 · f403694 · f403694 · github-actions · Sep 27, 2023
1 parent 1ecf27e
commit f403694
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 0 deletions.
diff --git a/nucliadb/nucliadb/search/search/find_merge.py b/nucliadb/nucliadb/search/search/find_merge.py
@@ -106,6 +106,10 @@ async def set_resource_metadata_value(
         )
         if serialized_resource is not None:
             find_resources[resource].updated_from(serialized_resource)
+        else:
+            logger.warning(f"Resource {resource} not found in {kbid}")
+            find_resources.pop(resource, None)
+
     finally:
         max_operations.release()
 

diff --git a/nucliadb/nucliadb/tests/integration/test_find.py b/nucliadb/nucliadb/tests/integration/test_find.py
@@ -18,6 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
 import asyncio
+from unittest.mock import patch
 
 import pytest
 from httpx import AsyncClient
@@ -190,3 +191,65 @@ async def test_find_min_score(
     )
     assert resp.status_code == 200
     assert resp.json()["min_score"] == 0.5
+
+
+@pytest.mark.asyncio
+async def test_story_7286(
+    nucliadb_reader: AsyncClient,
+    nucliadb_writer: AsyncClient,
+    nucliadb_grpc: WriterStub,
+    knowledgebox,
+    caplog,
+):
+    resp = await nucliadb_writer.post(
+        f"/kb/{knowledgebox}/resources",
+        json={
+            "slug": "myresource",
+            "title": "My Title",
+            "summary": "My summary",
+            "icon": "text/plain",
+        },
+    )
+    assert resp.status_code == 201
+    rid = resp.json()["uuid"]
+
+    resp = await nucliadb_writer.patch(
+        f"/kb/{knowledgebox}/resource/{rid}",
+        json={
+            "fieldmetadata": [
+                {
+                    "field": {
+                        "field": "text1",
+                        "field_type": "text",
+                    },
+                    "paragraphs": [
+                        {
+                            "key": f"{rid}/t/text1/0-7",
+                            "classifications": [{"labelset": "ls1", "label": "label"}],
+                        }
+                    ],
+                }
+            ]
+        },
+    )
+    assert resp.status_code == 200
+
+    with patch("nucliadb.search.search.find_merge.serialize", return_value=None):
+        # should get no result (because serialize returns None, as the resource is not found in the DB)
+        resp = await nucliadb_reader.post(
+            f"/kb/{knowledgebox}/find",
+            json={
+                "query": "title",
+                "features": ["paragraph", "vector", "relations"],
+                "shards": [],
+                "highlight": True,
+                "autofilter": False,
+                "page_number": 0,
+                "show": ["basic", "values", "origin"],
+                "filters": [],
+            },
+        )
+        assert resp.status_code == 200
+    body = resp.json()
+    assert len(body["resources"]) == 0
+    assert caplog.record_tuples[0][2] == f"Resource {rid} not found in {knowledgebox}"
Benchmark suite	Current: `f403694`	Previous: `374ff84`	Ratio
`nucliadb/tests/benchmarks/test_search.py::test_search_returns_labels[tikv_driver_settings]`	`55.589994526227066` iter/sec (`stddev: 0.00033342717884475136`)
`nucliadb/tests/benchmarks/test_search.py::test_search_relations[tikv_driver_settings]`	`153.94547058653507` iter/sec (`stddev: 0.0001462787212458045`)
Benchmark suite	Current: `f403694`	Previous: `374ff84`	Ratio
`nucliadb/tests/benchmarks/test_search.py::test_search_returns_labels[pg_driver_settings]`	`40.564386315183135` iter/sec (`stddev: 0.000550809285267684`)
`nucliadb/tests/benchmarks/test_search.py::test_search_relations[pg_driver_settings]`	`118.24289250280503` iter/sec (`stddev: 0.00021220229878493168`)