Skip to content

Commit

Permalink
add num_docs (#62)
Browse files Browse the repository at this point in the history
* add num_docs

add num_docs

* Update test_inmemory_vectordb.py

* Update test_hnswlib_vectordb.py
  • Loading branch information
0x376h authored Oct 8, 2023
1 parent a8531f6 commit 059dc48
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 3 deletions.
9 changes: 8 additions & 1 deletion tests/unit/test_hnswlib_vectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,11 @@ def test_hnswlib_vectordb_restore(docs_to_index, tmpdir):
assert len(res.matches) == 10
# assert res.id == res.matches[0].id
# assert res.text == res.matches[0].text
# assert res.scores[0] < 0.001 # some precision issues, should be 0
# assert res.scores[0] < 0.001 # some precision issues, should be 0

def test_hnswlib_num_dos(tmpdir):
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000
7 changes: 7 additions & 0 deletions tests/unit/test_inmemory_vectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,10 @@ def test_inmemory_vectordb_restore(docs_to_index, tmpdir):
assert res.id == res.matches[0].id
assert res.text == res.matches[0].text
assert res.scores[0] > 0.99 # some precision issues, should be 1

def test_inmemory_num_dos(tmpdir):
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000
3 changes: 3 additions & 0 deletions vectordb/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ async def _deploy():
ret = asyncio.run(_deploy())
return ret

def num_docs(self, **kwargs):
return self._executor.num_docs()

@pass_kwargs_as_params
@unify_input_output
def index(self, docs: 'DocList[TSchema]', parameters: Optional[Dict] = None, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion vectordb/db/executors/hnsw_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ async def async_update(self, docs, *args, **kwargs):
return self.update(docs, *args, **kwargs)

def num_docs(self, **kwargs):
return {'num_docs': self._index.num_docs()}
return {'num_docs': self._indexer.num_docs()}

def snapshot(self, snapshot_dir):
# TODO: Maybe copy the work_dir to workspace if `handle` is False
Expand Down
2 changes: 1 addition & 1 deletion vectordb/db/executors/inmemory_exact_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def update(self, docs, *args, **kwargs):
return self._index(docs)

def num_docs(self, *args, **kwargs):
return {'num_docs': self._index.num_docs()}
return {'num_docs': self._indexer.num_docs()}

def snapshot(self, snapshot_dir):
snapshot_file = f'{snapshot_dir}/index.bin'
Expand Down

0 comments on commit 059dc48

Please sign in to comment.