-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Standalone support for gc and shard splitting (#1222)
* support gc and shard splitting with standalone * fix * fix * also on delete * refactor kb orm * test fixes * test fixes * only 1 exception with that name * exc for datamanagers/ * type hint
- Loading branch information
Showing
19 changed files
with
271 additions
and
118 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright (C) 2021 Bosutech XXI S.L. | ||
# | ||
# nucliadb is offered under the AGPL v3.0 and as commercial software. | ||
# For commercial licensing, contact us at [email protected]. | ||
# | ||
# AGPL: | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as | ||
# published by the Free Software Foundation, either version 3 of the | ||
# License, or (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
class KnowledgeBoxNotFound(Exception): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright (C) 2021 Bosutech XXI S.L. | ||
# | ||
# nucliadb is offered under the AGPL v3.0 and as commercial software. | ||
# For commercial licensing, contact us at [email protected]. | ||
# | ||
# AGPL: | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as | ||
# published by the Free Software Foundation, either version 3 of the | ||
# License, or (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
# | ||
from nucliadb.common.cluster.exceptions import ShardsNotFound | ||
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound | ||
from nucliadb.common.maindb.driver import Driver | ||
from nucliadb_protos import knowledgebox_pb2, writer_pb2 | ||
from nucliadb_utils.keys import KB_SHARDS | ||
|
||
|
||
class KnowledgeBoxDataManager: | ||
def __init__(self, driver: Driver): | ||
self.driver = driver | ||
|
||
async def get_shards_object(self, kbid: str) -> writer_pb2.Shards: | ||
key = KB_SHARDS.format(kbid=kbid) | ||
async with self.driver.transaction() as txn: | ||
payload = await txn.get(key) | ||
if not payload: | ||
raise ShardsNotFound(kbid) | ||
pb = writer_pb2.Shards() | ||
pb.ParseFromString(payload) | ||
return pb | ||
|
||
async def get_model_metadata( | ||
self, kbid: str | ||
) -> knowledgebox_pb2.SemanticModelMetadata: | ||
try: | ||
shards_obj = await self.get_shards_object(kbid) | ||
except ShardsNotFound: | ||
raise KnowledgeBoxNotFound(kbid) | ||
if shards_obj.HasField("model"): | ||
return shards_obj.model | ||
else: | ||
# B/c code for old KBs that do not have the `model` attribute set in the Shards object. | ||
# Cleanup this code after a migration is done unifying all fields under `model` (on-prem and cloud). | ||
return knowledgebox_pb2.SemanticModelMetadata( | ||
similarity_function=shards_obj.similarity | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
a4fc620
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error
4465.288993125828
iter/sec (stddev: 0.000003904942276966958
)5804.479338298567
iter/sec (stddev: 1.4505330313876097e-7
)1.30
This comment was automatically generated by workflow using github-action-benchmark.
a4fc620
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark
nucliadb/tests/benchmarks/test_search.py::test_search_returns_labels
79.31886786101556
iter/sec (stddev: 0.00020922160019282615
)60.779932309336715
iter/sec (stddev: 0.0019119907918232523
)0.77
nucliadb/tests/benchmarks/test_search.py::test_search_relations
183.0915662584169
iter/sec (stddev: 0.00019032119488819046
)182.57436721258293
iter/sec (stddev: 0.0002220745559283828
)1.00
This comment was automatically generated by workflow using github-action-benchmark.