Skip to content

Commit

Permalink
Initiaal push
Browse files Browse the repository at this point in the history
  • Loading branch information
GoldenWind8 committed Nov 23, 2023
1 parent 2906576 commit dec98fe
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ classifiers = [

[tool.poetry.dependencies]
python = "^3.8.1"
sentence_transformers = "*"
transformers = "*"
qdrant_client = "*"
openai = "0.28.0"
langchain = "*"
asyncio = "*"
Expand Down
59 changes: 56 additions & 3 deletions swarms/memory/qdrant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,59 @@
"""
QDRANT MEMORY CLASS
from httpx import RequestError
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
class Qdrant:
def __init__(self,api_key, host, port=6333, collection_name="qdrant", model_name="BAAI/bge-small-en-v1.5", https=True ):
self.client = QdrantClient(url=host, port=port, api_key=api_key) #, port=port, api_key=api_key, https=False
self.collection_name = collection_name
self._load_embedding_model(model_name)
self._setup_collection()

def _load_embedding_model(self, model_name):
# Load the embedding model
self.model = SentenceTransformer(model_name)

def _setup_collection(self):
# Check if the collection already exists
try:
exists = self.client.get_collection(self.collection_name)
return
except Exception:
# Collection does not exist, create it
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.DOT),
)
print(f"Collection '{self.collection_name}' created.")
else:
print(f"Collection '{self.collection_name}' already exists.")

"""
def add_vectors(self, docs):
# Add vectors with payloads to the collection
points = []
for i, doc in enumerate(docs):
if doc.page_content:
embedding = self.model.encode(doc.page_content, normalize_embeddings=True)
points.append(PointStruct(id=i + 1, vector=embedding, payload={"content":doc.page_content}))
else:
print(f"Document at index {i} is missing 'text' or 'payload' key")

operation_info = self.client.upsert(
collection_name=self.collection_name,
wait=True,
points=points,
)
print(operation_info)
def search_vectors(self, query, limit=3):
query_vector= self.model.encode(query, normalize_embeddings=True)
# Search for similar vectors
search_result = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=limit
)
return search_result



#TODO, use kwargs in constructor, have search result be text
18 changes: 18 additions & 0 deletions swarms/memory/qdrant/usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from langchain.document_loaders import CSVLoader
from swarms.memory import qdrant

loader = CSVLoader(file_path="../document_parsing/aipg/aipg.csv", encoding='utf-8-sig')
docs = loader.load()


# Initialize the Qdrant instance
# See qdrant documentation on how to run locally
qdrant_client = qdrant.Qdrant(host ="https://697ea26c-2881-4e17-8af4-817fcb5862e8.europe-west3-0.gcp.cloud.qdrant.io", collection_name="qdrant", api_key="BhG2_yINqNU-aKovSEBadn69Zszhbo5uaqdJ6G_qDkdySjAljvuPqQ")
qdrant_client.add_vectors(docs)

# Perform a search
search_query = "Who is jojo"
search_results = qdrant_client.search_vectors(search_query)
print("Search Results:")
for result in search_results:
print(result)

0 comments on commit dec98fe

Please sign in to comment.