From 48002468ca9b0b44765ce082165a8a5c05f81b59 Mon Sep 17 00:00:00 2001 From: Komal Gilani Date: Thu, 17 Oct 2024 13:23:29 +0200 Subject: [PATCH] updated code --- backend/pyproject.toml | 1 + backend/requirements.txt | 56 +++++++++---------- backend/src/mapping_generation/llm_chain.py | 8 +-- backend/src/mapping_generation/manager_llm.py | 1 + backend/src/mapping_generation/utils.py | 2 + 5 files changed, 36 insertions(+), 32 deletions(-) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 097653a..47be16c 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "faiss-cpu", "langchain-together", "simstring-fast", + "langchain_ollama", "langchain_huggingface", "langchain_groq", "fastembed", diff --git a/backend/requirements.txt b/backend/requirements.txt index b8aa79d..11a9b2f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -224,37 +224,37 @@ numpy==1.26.4 # rank-bm25 # scikit-learn # scipy - # transformers -nvidia-cublas-cu12==12.1.3.1 +# # transformers +# nvidia-cublas-cu12==12.1.3.1 # via # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch -nvidia-cuda-cupti-cu12==12.1.105 - # via torch -nvidia-cuda-nvrtc-cu12==12.1.105 - # via torch -nvidia-cuda-runtime-cu12==12.1.105 - # via torch -nvidia-cudnn-cu12==9.1.0.70 - # via torch -nvidia-cufft-cu12==11.0.2.54 - # via torch -nvidia-curand-cu12==10.3.2.106 - # via torch -nvidia-cusolver-cu12==11.4.5.107 - # via torch -nvidia-cusparse-cu12==12.1.0.106 - # via - # nvidia-cusolver-cu12 - # torch -nvidia-nccl-cu12==2.20.5 - # via torch -nvidia-nvjitlink-cu12==12.6.77 - # via - # nvidia-cusolver-cu12 - # nvidia-cusparse-cu12 -nvidia-nvtx-cu12==12.1.105 +# nvidia-cuda-cupti-cu12==12.1.105 +# # via torch +# nvidia-cuda-nvrtc-cu12==12.1.105 +# # via torch +# nvidia-cuda-runtime-cu12==12.1.105 +# # via torch +# nvidia-cudnn-cu12==9.1.0.70 +# # via torch +# nvidia-cufft-cu12==11.0.2.54 +# # via torch +# nvidia-curand-cu12==10.3.2.106 +# # via torch +# nvidia-cusolver-cu12==11.4.5.107 +# # via torch +# nvidia-cusparse-cu12==12.1.0.106 +# # via +# # nvidia-cusolver-cu12 +# # torch +# nvidia-nccl-cu12==2.20.5 +# # via torch +# nvidia-nvjitlink-cu12==12.6.77 +# # via +# # nvidia-cusolver-cu12 +# # nvidia-cusparse-cu12 +# nvidia-nvtx-cu12==12.1.105 # via torch odfpy==1.4.1 # via pandas @@ -473,7 +473,7 @@ transformers==4.43.4 # adapters # langchain-huggingface # sentence-transformers -triton==3.0.0 +# triton==3.0.0 # via torch typing-extensions==4.9.0 # via diff --git a/backend/src/mapping_generation/llm_chain.py b/backend/src/mapping_generation/llm_chain.py index 14f2eb5..37e9097 100644 --- a/backend/src/mapping_generation/llm_chain.py +++ b/backend/src/mapping_generation/llm_chain.py @@ -493,7 +493,7 @@ def extract_information(query, model_name=LLM_ID, prompt=None): ) result["rel"] = rel result["full_query"] = query - print(f"extract_information result={result}") + print(f"extract information result after fixing={result}") return QueryDecomposedModel(**result) except ValidationError as e: @@ -507,7 +507,7 @@ def extract_information(query, model_name=LLM_ID, prompt=None): ) result["rel"] = rel result["full_query"] = query - print(f"extract_information result={result}") + print(f"extract information result={result}") return QueryDecomposedModel(**result) # except Exception as e: # logger.info(f"Error in prompt:{e}") @@ -928,7 +928,7 @@ def pass_to_chat_llm_chain( link_predictions_results = [] for _ in range(n_prompts): # Assume n_prompts is 3 - ranking_prompt = generate_ranking_prompt(query=query,domain=domain,in_context=False) + ranking_prompt = generate_ranking_prompt(query=query,domain=domain,in_context=True) ranking_results = get_llm_results(prompt=ranking_prompt, query=query, documents=documents, llm=model,llm_name=llm_name) if ranking_results: ranking_scores.extend(ranking_results) @@ -938,7 +938,7 @@ def pass_to_chat_llm_chain( logger.info(f"Exact match found in Ranking: {result['answer']} = {exact_match_found_rank}. Does it exist in original documents={result['answer'] in documents}") link_predictions_results = [] if prompt_stage == 2: - link_prediction_prompt = generate_link_prediction_prompt(query, documents,domain=domain,in_context=False) + link_prediction_prompt = generate_link_prediction_prompt(query, documents,domain=domain,in_context=True) lp_results = get_llm_results(prompt=link_prediction_prompt, query=query, documents=documents, llm=model,llm_name=llm_name) if lp_results: for res in lp_results: diff --git a/backend/src/mapping_generation/manager_llm.py b/backend/src/mapping_generation/manager_llm.py index 746bb0c..a003d11 100644 --- a/backend/src/mapping_generation/manager_llm.py +++ b/backend/src/mapping_generation/manager_llm.py @@ -229,6 +229,7 @@ def get_example_selector(context_key: str, examples: List[Dict[str, str]], k=4, if context_key not in ExampleSelectorManager._selectors: try: if selector_path is None: + selector_path = f'../data/faiss_index_{context_key}' os.makedirs(os.path.dirname(selector_path), exist_ok=True) # Create the directory if it doesn't exist # Initialize the embeddings diff --git a/backend/src/mapping_generation/utils.py b/backend/src/mapping_generation/utils.py index e742a37..877e7d8 100644 --- a/backend/src/mapping_generation/utils.py +++ b/backend/src/mapping_generation/utils.py @@ -217,6 +217,8 @@ def save_json_data(file_path, data): def init_logger(log_file_path=LOG_FILE) -> logging.Logger: # Create a logger + if not os.path.exists(os.path.dirname(log_file_path)): + os.makedirs(os.path.dirname(log_file_path)) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # Set the logging level to DEBUG # Create a file handler