forked from srbhr/Resume-Matcher
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
removed cohere dependency , update codes . (srbhr#261)
- Loading branch information
1 parent
0034243
commit a1cc170
Showing
4 changed files
with
226 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,5 +113,4 @@ reportlab==3.6.13 | |
easygui==0.98.3 | ||
xhtml2pdf==0.2.11 | ||
fastembed~=0.2.2 | ||
cohere==5.3.2 | ||
qdrant-client==1.8.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"id": "initial_id", | ||
"metadata": { | ||
"collapsed": true, | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:31:25.398358Z", | ||
"start_time": "2024-05-14T22:31:22.567855Z" | ||
} | ||
}, | ||
"source": [ | ||
"import json\n", | ||
"import logging\n", | ||
"import os\n", | ||
"\n", | ||
"import yaml\n", | ||
"from qdrant_client import QdrantClient\n", | ||
"from scripts.utils.logger import init_logging_config\n" | ||
], | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\u001B[32m2024-05-14 15:31:22.994\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36mfastembed.embedding\u001B[0m:\u001B[36m<module>\u001B[0m:\u001B[36m7\u001B[0m - \u001B[33m\u001B[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated.Use from fastembed import TextEmbedding instead.\u001B[0m\n" | ||
] | ||
} | ||
], | ||
"execution_count": 1 | ||
}, | ||
{ | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:31:28.571615Z", | ||
"start_time": "2024-05-14T22:31:28.569033Z" | ||
} | ||
}, | ||
"cell_type": "code", | ||
"source": [ | ||
"init_logging_config(basic_log_level=logging.INFO)\n", | ||
"# Get the logger\n", | ||
"logger = logging.getLogger(__name__)\n", | ||
"\n", | ||
"# Set the logging level\n", | ||
"logger.setLevel(logging.INFO)" | ||
], | ||
"id": "a69d1b16785bf17b", | ||
"outputs": [], | ||
"execution_count": 2 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"def find_path(folder_name):\n", | ||
" curr_dir = os.getcwd()\n", | ||
" while True:\n", | ||
" if folder_name in os.listdir(curr_dir):\n", | ||
" return os.path.join(curr_dir, folder_name)\n", | ||
" else:\n", | ||
" parent_dir = os.path.dirname(curr_dir)\n", | ||
" if parent_dir == \"/\":\n", | ||
" break\n", | ||
" curr_dir = parent_dir\n", | ||
" raise ValueError(f\"Folder '{folder_name}' not found.\")\n", | ||
"\n", | ||
"\n", | ||
"def read_config(filepath):\n", | ||
" try:\n", | ||
" with open(filepath) as f:\n", | ||
" config = yaml.safe_load(f)\n", | ||
" return config\n", | ||
" except FileNotFoundError as e:\n", | ||
" logger.error(f\"Configuration file {filepath} not found: {e}\")\n", | ||
" except yaml.YAMLError as e:\n", | ||
" logger.error(\n", | ||
" f\"Error parsing YAML in configuration file {filepath}: {e}\", exc_info=True\n", | ||
" )\n", | ||
" except Exception as e:\n", | ||
" logger.error(f\"Error reading configuration file {filepath}: {e}\")\n", | ||
" return None\n", | ||
"\n", | ||
"\n", | ||
"def read_doc(path):\n", | ||
" with open(path) as f:\n", | ||
" try:\n", | ||
" data = json.load(f)\n", | ||
" except Exception as e:\n", | ||
" logger.error(f\"Error reading JSON file: {e}\")\n", | ||
" data = {}\n", | ||
" return data\n" | ||
], | ||
"metadata": { | ||
"collapsed": false, | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:31:31.783421Z", | ||
"start_time": "2024-05-14T22:31:31.763842Z" | ||
} | ||
}, | ||
"id": "899904f870d3f720", | ||
"outputs": [], | ||
"execution_count": 3 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"def get_score(resume_string, job_description_string):\n", | ||
" logger.info(\"Started getting similarity score\")\n", | ||
"\n", | ||
" documents = [resume_string]\n", | ||
" client = QdrantClient(\":memory:\")\n", | ||
" client.set_model(\"BAAI/bge-base-en\")\n", | ||
"\n", | ||
" client.add(\n", | ||
" collection_name=\"demo_collection\",\n", | ||
" documents=documents,\n", | ||
" )\n", | ||
"\n", | ||
" search_result = client.query(\n", | ||
" collection_name=\"demo_collection\", query_text=job_description_string\n", | ||
" )\n", | ||
" logger.info(\"Finished getting similarity score\")\n", | ||
" return search_result" | ||
], | ||
"metadata": { | ||
"collapsed": false, | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:31:33.976354Z", | ||
"start_time": "2024-05-14T22:31:33.972943Z" | ||
} | ||
}, | ||
"id": "c54a86727f436747", | ||
"outputs": [], | ||
"execution_count": 4 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"cwd = find_path('Resume-Matcher')\n", | ||
"READ_RESUME_FROM = os.path.join(cwd, 'Data', 'Processed', 'Resumes')\n", | ||
"READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, 'Data', 'Processed', 'JobDescription')\n", | ||
"config_path = os.path.join(cwd, \"scripts\", \"similarity\")" | ||
], | ||
"metadata": { | ||
"collapsed": false, | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:31:40.945947Z", | ||
"start_time": "2024-05-14T22:31:40.934732Z" | ||
} | ||
}, | ||
"id": "ae15b96094e7c460", | ||
"outputs": [], | ||
"execution_count": 5 | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# To give your custom resume use this code\n", | ||
"resume_dict = read_config(\n", | ||
" READ_RESUME_FROM\n", | ||
" + \"/Resume-alfred_pennyworth_pm.pdf83632b66-5cce-4322-a3c6-895ff7e3dd96.json\"\n", | ||
")\n", | ||
"job_dict = read_config(\n", | ||
" READ_JOB_DESCRIPTION_FROM\n", | ||
" + \"/JobDescription-job_desc_product_manager.pdf6763dc68-12ff-4b32-b652-ccee195de071.json\"\n", | ||
")\n", | ||
"resume_keywords = resume_dict[\"extracted_keywords\"]\n", | ||
"job_description_keywords = job_dict[\"extracted_keywords\"]\n", | ||
"\n", | ||
"resume_string = \" \".join(resume_keywords)\n", | ||
"jd_string = \" \".join(job_description_keywords)\n", | ||
"final_result = get_score(resume_string, jd_string)\n", | ||
"for r in final_result:\n", | ||
" print(r.score)" | ||
], | ||
"metadata": { | ||
"collapsed": false, | ||
"ExecuteTime": { | ||
"end_time": "2024-05-14T22:32:29.666678Z", | ||
"start_time": "2024-05-14T22:32:28.781841Z" | ||
} | ||
}, | ||
"id": "2f531db076cc1f59", | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\u001B[32;10m2024-05-14 15:32:28,825 (1835371807.py:2) - INFO: \u001B[0mStarted getting similarity score\n", | ||
"\u001B[32;10m2024-05-14 15:32:29,664 (1835371807.py:16) - INFO: \u001B[0mFinished getting similarity score\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.8572231574198962\n" | ||
] | ||
} | ||
], | ||
"execution_count": 6 | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |