Skip to content

Commit

Permalink
faster parallel embed
Browse files Browse the repository at this point in the history
  • Loading branch information
devxpy committed Nov 18, 2023
1 parent 58a1cc9 commit 493ceb9
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions daras_ai_v2/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,14 +400,13 @@ def get_embeds_for_doc(
)
]
# get doc embeds in batches
embeds = []
batch_size = 16 # azure openai limits
texts = [m["title"] + " | " + m["snippet"] for m in metas]
for i in range(0, len(texts), batch_size):
# progress = int(i / len(texts) * 100)
# print(f"Getting document embeddings ({progress}%)...")
batch = texts[i : i + batch_size]
embeds.extend(openai_embedding_create(batch))
embeds = flatmap_parallel(
openai_embedding_create,
[texts[i : i + batch_size] for i in range(0, len(texts), batch_size)],
max_workers=5,
)
return list(zip(metas, embeds))


Expand Down

0 comments on commit 493ceb9

Please sign in to comment.