Skip to content

Commit

Permalink
Small fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Dicklesworthstone committed May 18, 2024
1 parent 45a9837 commit ea9df57
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 6 deletions.
4 changes: 2 additions & 2 deletions service_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async def compute_and_store_transcript_embeddings(audio_file_name, list_of_trans
await store_document_embeddings_in_db(fake_upload_file, file_hash, combined_transcript_text.encode(), json.dumps(computed_embeddings).encode(), computed_embeddings, llm_model_name, ip_address, datetime.utcnow())
return full_download_url

async def compute_transcript_with_whisper_from_audio_func(audio_file_hash, audio_file_path, audio_file_name, audio_file_size_mb, ip_address, req: Request, compute_embeddings_for_resulting_transcript_document=True, llm_model_name=DEFAULT_MODEL_NAME, corpus_identifier_string: str):
async def compute_transcript_with_whisper_from_audio_func(audio_file_hash, audio_file_path, audio_file_name, audio_file_size_mb, ip_address, req: Request, corpus_identifier_string: str, compute_embeddings_for_resulting_transcript_document=True, llm_model_name=DEFAULT_MODEL_NAME):
model_size = "large-v2"
logger.info(f"Loading Whisper model {model_size}...")
num_workers = 1 if psutil.virtual_memory().total < 32 * (1024 ** 3) else min(4, max(1, int((psutil.virtual_memory().total - 32 * (1024 ** 3)) / (4 * (1024 ** 3))))) # Only use more than 1 worker if there is at least 32GB of RAM; then use 1 worker per additional 4GB of RAM up to 4 workers max
Expand Down Expand Up @@ -191,7 +191,7 @@ async def get_or_compute_transcript(file: UploadFile,
if corpus_identifier_string is None:
corpus_identifier_string = audio_file_hash

segment_details, info, combined_transcript_text, combined_transcript_text_list_of_metadata_dicts, request_time, response_time, total_time, download_url = await compute_transcript_with_whisper_from_audio_func(audio_file_hash, audio_file_name, file.filename, audio_file_size_mb, ip_address, req, compute_embeddings_for_resulting_transcript_document, llm_model_name, corpus_identifier_string)
segment_details, info, combined_transcript_text, combined_transcript_text_list_of_metadata_dicts, request_time, response_time, total_time, download_url = await compute_transcript_with_whisper_from_audio_func(audio_file_hash, audio_file_name, file.filename, audio_file_size_mb, ip_address, req, corpus_identifier_string, compute_embeddings_for_resulting_transcript_document, llm_model_name)
audio_transcript_response = {
"audio_file_hash": audio_file_hash,
"audio_file_name": file.filename,
Expand Down
4 changes: 0 additions & 4 deletions swiss_army_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,12 +629,10 @@ async def search_stored_embeddings_with_query_string_for_semantic_similarity(req
if faiss_index is None:
raise HTTPException(status_code=400, detail=f"No FAISS index found for model: {llm_model_name}")
logger.info("Searching for the most similar string in the FAISS index")

if request.corpus_identifier_string:
associated_texts_by_model = await get_texts_for_corpus_identifier(request.corpus_identifier_string)
else:
associated_texts_by_model = await get_texts_for_model(llm_model_name)

similarities, indices = faiss_index.search(input_embedding.reshape(1, -1), num_results) # Search for num_results similar strings
results = [] # Create an empty list to store the results
for ii in range(num_results):
Expand Down Expand Up @@ -813,10 +811,8 @@ async def get_all_embedding_vectors_for_document(file: UploadFile = File(...),
hash_obj.update(chunk)
file_hash = hash_obj.hexdigest()
logger.info(f"SHA3-256 hash of submitted file: {file_hash}")

if corpus_identifier_string is None:
corpus_identifier_string = file_hash

unique_id = f"document_embedding_{file_hash}_{llm_model_name}"
lock = await shared_resources.lock_manager.lock(unique_id)
if lock.valid:
Expand Down

0 comments on commit ea9df57

Please sign in to comment.