Skip to content

Commit

Permalink
Find endpoint returns wrong resource ids (#1376)
Browse files Browse the repository at this point in the history
* added a test

* reproduce the issue (seralize returns None)

* verify we issue a warning
  • Loading branch information
tarekziade authored Sep 27, 2023
1 parent 1ecf27e commit f403694
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
4 changes: 4 additions & 0 deletions nucliadb/nucliadb/search/search/find_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ async def set_resource_metadata_value(
)
if serialized_resource is not None:
find_resources[resource].updated_from(serialized_resource)
else:
logger.warning(f"Resource {resource} not found in {kbid}")
find_resources.pop(resource, None)

finally:
max_operations.release()

Expand Down
63 changes: 63 additions & 0 deletions nucliadb/nucliadb/tests/integration/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import asyncio
from unittest.mock import patch

import pytest
from httpx import AsyncClient
Expand Down Expand Up @@ -190,3 +191,65 @@ async def test_find_min_score(
)
assert resp.status_code == 200
assert resp.json()["min_score"] == 0.5


@pytest.mark.asyncio
async def test_story_7286(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
nucliadb_grpc: WriterStub,
knowledgebox,
caplog,
):
resp = await nucliadb_writer.post(
f"/kb/{knowledgebox}/resources",
json={
"slug": "myresource",
"title": "My Title",
"summary": "My summary",
"icon": "text/plain",
},
)
assert resp.status_code == 201
rid = resp.json()["uuid"]

resp = await nucliadb_writer.patch(
f"/kb/{knowledgebox}/resource/{rid}",
json={
"fieldmetadata": [
{
"field": {
"field": "text1",
"field_type": "text",
},
"paragraphs": [
{
"key": f"{rid}/t/text1/0-7",
"classifications": [{"labelset": "ls1", "label": "label"}],
}
],
}
]
},
)
assert resp.status_code == 200

with patch("nucliadb.search.search.find_merge.serialize", return_value=None):
# should get no result (because serialize returns None, as the resource is not found in the DB)
resp = await nucliadb_reader.post(
f"/kb/{knowledgebox}/find",
json={
"query": "title",
"features": ["paragraph", "vector", "relations"],
"shards": [],
"highlight": True,
"autofilter": False,
"page_number": 0,
"show": ["basic", "values", "origin"],
"filters": [],
},
)
assert resp.status_code == 200
body = resp.json()
assert len(body["resources"]) == 0
assert caplog.record_tuples[0][2] == f"Resource {rid} not found in {knowledgebox}"

3 comments on commit f403694

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: f403694 Previous: 374ff84 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 9961.463283719628 iter/sec (stddev: 3.905548854902772e-7) 8667.761941074237 iter/sec (stddev: 2.4327377097212595e-7) 0.87

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: f403694 Previous: 374ff84 Ratio
nucliadb/tests/benchmarks/test_search.py::test_search_returns_labels[tikv_driver_settings] 55.589994526227066 iter/sec (stddev: 0.00033342717884475136)
nucliadb/tests/benchmarks/test_search.py::test_search_relations[tikv_driver_settings] 153.94547058653507 iter/sec (stddev: 0.0001462787212458045)

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: f403694 Previous: 374ff84 Ratio
nucliadb/tests/benchmarks/test_search.py::test_search_returns_labels[pg_driver_settings] 40.564386315183135 iter/sec (stddev: 0.000550809285267684)
nucliadb/tests/benchmarks/test_search.py::test_search_relations[pg_driver_settings] 118.24289250280503 iter/sec (stddev: 0.00021220229878493168)

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.