Skip to content

Commit

Permalink
pinecone update (#159)
Browse files Browse the repository at this point in the history
* pinecone update

* new black
  • Loading branch information
hbertrand authored Feb 7, 2024
1 parent fbb7179 commit 07b6bb8
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 14 deletions.
13 changes: 5 additions & 8 deletions buster/documents_manager/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class DocumentsService(DocumentsManager):
def __init__(
self,
pinecone_api_key: str,
pinecone_env: str,
pinecone_index: str,
pinecone_namespace: str,
mongo_uri: str,
Expand All @@ -37,9 +36,9 @@ def __init__(
"""
super().__init__(**kwargs)

pinecone.init(api_key=pinecone_api_key, environment=pinecone_env)
pc = pinecone.Pinecone(api_key=pinecone_api_key)

self.index = pinecone.Index(pinecone_index)
self.index = pc.Index(pinecone_index)
self.namespace = pinecone_namespace

self.mongo_db_name = mongo_db_name
Expand Down Expand Up @@ -98,11 +97,9 @@ def _add_documents(self, df: pd.DataFrame):

to_upsert.append(vector)

# Current (November 2023) Pinecone upload rules:
# - Max 1000 vectors per batch
# - Max 2 MB per batch
# Sparse vectors are heavier, so we reduce the batch size when using them.
MAX_PINECONE_BATCH_SIZE = 100 if use_sparse_vector else 1000
# Current (February 2024) Pinecone upload rules:
# - Max 100 vectors per batch
MAX_PINECONE_BATCH_SIZE = 100
for i in range(0, len(to_upsert), MAX_PINECONE_BATCH_SIZE):
self.index.upsert(vectors=to_upsert[i : i + MAX_PINECONE_BATCH_SIZE], namespace=self.namespace)

Expand Down
3 changes: 1 addition & 2 deletions buster/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,7 @@ def relative_path(self) -> str:
return self._relative_path

@abstractmethod
def find_sections(self) -> Iterator[Section]:
...
def find_sections(self) -> Iterator[Section]: ...

def parse(self) -> list[Section]:
"""Parse the documents into sections, respecting the lenght constraints."""
Expand Down
5 changes: 2 additions & 3 deletions buster/retriever/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class ServiceRetriever(Retriever):
def __init__(
self,
pinecone_api_key: str,
pinecone_env: str,
pinecone_index: str,
pinecone_namespace: str,
mongo_uri: str,
Expand All @@ -43,9 +42,9 @@ def __init__(
"""
super().__init__(**kwargs)

pinecone.init(api_key=pinecone_api_key, environment=pinecone_env)
pc = pinecone.Pinecone(api_key=pinecone_api_key)

self.index = pinecone.Index(pinecone_index)
self.index = pc.Index(pinecone_index)
self.namespace = pinecone_namespace

self.client = MongoClient(mongo_uri, server_api=ServerApi("1"))
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ matplotlib
numpy>=1.25
openai>=1.0
pandas>=2.1.3
pinecone-client
pinecone-client>=3.0.2
pinecone-text>=0.6.0
pymongo
pytest
Expand Down

0 comments on commit 07b6bb8

Please sign in to comment.