Skip to content

Commit

Permalink
removed cohere dependency , update codes . (srbhr#261)
Browse files Browse the repository at this point in the history
  • Loading branch information
SubramanyamChalla24 authored May 15, 2024
1 parent 0034243 commit a1cc170
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 59 deletions.
28 changes: 0 additions & 28 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,34 +167,6 @@ The full stack Next.js (React and FastAPI) web application allows users to inter

To run the full stack web application (frontend client and backend api servers), follow the instructions over on the [webapp README](/webapp/README.md) file.

<br/>

### Cohere and Qdrant

1. Visit [Cohere website registration](https://dashboard.cohere.ai/welcome/register) and create an account.
2. Go to API keys and copy your cohere api key.
3. Visit [Qdrant website](https://cloud.qdrant.io/) and create an account.
4. Get your api key and cluster url.
5. Go to open dashboard in qdrant and enter your api key **for only the first time**

<img src="Assets/img/quadrant_cloud.png" height="60%" width="60%"/>

6. Now create a yaml file named config.yml in Scripts/Similarity/ folder.

7. The format for the conifg file should be as below:
```yaml
cohere:
api_key: cohere_key
qdrant:
api_key: qdrant_api_key
url: qdrant_cluster_url
```
8. Please replace your values without any quotes.

*Note: Please make sure that Qdrant_client's version is higher than v1.1*
<br/>
## Code Formatting

This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting. We believe this helps to keep the code base consistent and reduces the cognitive load when reading code.
Expand Down
30 changes: 0 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,35 +198,6 @@ To run the full stack web application (frontend client and backend api servers),
5. Copy the url and open it in your browser.
6. ![img_2.png](img_2.png)

### Cohere and Qdrant

1. Visit [Cohere website registration](https://dashboard.cohere.ai/welcome/register) and create an account.
2. Go to API keys and copy your cohere api key.
3. Visit [Qdrant website](https://cloud.qdrant.io/) and create an account.
4. Get your api key and cluster url.
5. Go to open dashboard in qdrant and enter your api key **for only the first time**
<img src="Assets/img/quadrant_cloud.png" height="60%" width="60%"/>
1. Now create a yaml file named config.yml in Scripts/Similarity/ folder.
2. The format for the conifg file should be as below:
```yaml
cohere:
api_key: cohere_key
qdrant:
api_key: qdrant_api_key
url: qdrant_cluster_url
```
3. Please replace your values without any quotes.

*Note: Please make sure that Qdrant_client's version is higher than v1.1*
*Note: This part needs updating w.r.t to the new FastEmbed changes.*
<br/>
<div align="center">
## Code Formatting

This project uses [Black](https://black.readthedocs.io/en/stable/) for code formatting. We believe this helps to keep the code base consistent and reduces the cognitive load when reading code.
Expand All @@ -253,7 +224,6 @@ Now, the pre-commit hooks will automatically run every time you commit your chan
## Join Us, Contribute!
</div>
Pull Requests & Issues are not just welcomed, they're celebrated! Let's create together.
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,4 @@ reportlab==3.6.13
easygui==0.98.3
xhtml2pdf==0.2.11
fastembed~=0.2.2
cohere==5.3.2
qdrant-client==1.8.2
226 changes: 226 additions & 0 deletions scripts/similarity/get_score.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-05-14T22:31:25.398358Z",
"start_time": "2024-05-14T22:31:22.567855Z"
}
},
"source": [
"import json\n",
"import logging\n",
"import os\n",
"\n",
"import yaml\n",
"from qdrant_client import QdrantClient\n",
"from scripts.utils.logger import init_logging_config\n"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001B[32m2024-05-14 15:31:22.994\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36mfastembed.embedding\u001B[0m:\u001B[36m<module>\u001B[0m:\u001B[36m7\u001B[0m - \u001B[33m\u001B[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated.Use from fastembed import TextEmbedding instead.\u001B[0m\n"
]
}
],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-05-14T22:31:28.571615Z",
"start_time": "2024-05-14T22:31:28.569033Z"
}
},
"cell_type": "code",
"source": [
"init_logging_config(basic_log_level=logging.INFO)\n",
"# Get the logger\n",
"logger = logging.getLogger(__name__)\n",
"\n",
"# Set the logging level\n",
"logger.setLevel(logging.INFO)"
],
"id": "a69d1b16785bf17b",
"outputs": [],
"execution_count": 2
},
{
"cell_type": "code",
"source": [
"def find_path(folder_name):\n",
" curr_dir = os.getcwd()\n",
" while True:\n",
" if folder_name in os.listdir(curr_dir):\n",
" return os.path.join(curr_dir, folder_name)\n",
" else:\n",
" parent_dir = os.path.dirname(curr_dir)\n",
" if parent_dir == \"/\":\n",
" break\n",
" curr_dir = parent_dir\n",
" raise ValueError(f\"Folder '{folder_name}' not found.\")\n",
"\n",
"\n",
"def read_config(filepath):\n",
" try:\n",
" with open(filepath) as f:\n",
" config = yaml.safe_load(f)\n",
" return config\n",
" except FileNotFoundError as e:\n",
" logger.error(f\"Configuration file {filepath} not found: {e}\")\n",
" except yaml.YAMLError as e:\n",
" logger.error(\n",
" f\"Error parsing YAML in configuration file {filepath}: {e}\", exc_info=True\n",
" )\n",
" except Exception as e:\n",
" logger.error(f\"Error reading configuration file {filepath}: {e}\")\n",
" return None\n",
"\n",
"\n",
"def read_doc(path):\n",
" with open(path) as f:\n",
" try:\n",
" data = json.load(f)\n",
" except Exception as e:\n",
" logger.error(f\"Error reading JSON file: {e}\")\n",
" data = {}\n",
" return data\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-05-14T22:31:31.783421Z",
"start_time": "2024-05-14T22:31:31.763842Z"
}
},
"id": "899904f870d3f720",
"outputs": [],
"execution_count": 3
},
{
"cell_type": "code",
"source": [
"def get_score(resume_string, job_description_string):\n",
" logger.info(\"Started getting similarity score\")\n",
"\n",
" documents = [resume_string]\n",
" client = QdrantClient(\":memory:\")\n",
" client.set_model(\"BAAI/bge-base-en\")\n",
"\n",
" client.add(\n",
" collection_name=\"demo_collection\",\n",
" documents=documents,\n",
" )\n",
"\n",
" search_result = client.query(\n",
" collection_name=\"demo_collection\", query_text=job_description_string\n",
" )\n",
" logger.info(\"Finished getting similarity score\")\n",
" return search_result"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-05-14T22:31:33.976354Z",
"start_time": "2024-05-14T22:31:33.972943Z"
}
},
"id": "c54a86727f436747",
"outputs": [],
"execution_count": 4
},
{
"cell_type": "code",
"source": [
"cwd = find_path('Resume-Matcher')\n",
"READ_RESUME_FROM = os.path.join(cwd, 'Data', 'Processed', 'Resumes')\n",
"READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, 'Data', 'Processed', 'JobDescription')\n",
"config_path = os.path.join(cwd, \"scripts\", \"similarity\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-05-14T22:31:40.945947Z",
"start_time": "2024-05-14T22:31:40.934732Z"
}
},
"id": "ae15b96094e7c460",
"outputs": [],
"execution_count": 5
},
{
"cell_type": "code",
"source": [
"# To give your custom resume use this code\n",
"resume_dict = read_config(\n",
" READ_RESUME_FROM\n",
" + \"/Resume-alfred_pennyworth_pm.pdf83632b66-5cce-4322-a3c6-895ff7e3dd96.json\"\n",
")\n",
"job_dict = read_config(\n",
" READ_JOB_DESCRIPTION_FROM\n",
" + \"/JobDescription-job_desc_product_manager.pdf6763dc68-12ff-4b32-b652-ccee195de071.json\"\n",
")\n",
"resume_keywords = resume_dict[\"extracted_keywords\"]\n",
"job_description_keywords = job_dict[\"extracted_keywords\"]\n",
"\n",
"resume_string = \" \".join(resume_keywords)\n",
"jd_string = \" \".join(job_description_keywords)\n",
"final_result = get_score(resume_string, jd_string)\n",
"for r in final_result:\n",
" print(r.score)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-05-14T22:32:29.666678Z",
"start_time": "2024-05-14T22:32:28.781841Z"
}
},
"id": "2f531db076cc1f59",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001B[32;10m2024-05-14 15:32:28,825 (1835371807.py:2) - INFO: \u001B[0mStarted getting similarity score\n",
"\u001B[32;10m2024-05-14 15:32:29,664 (1835371807.py:16) - INFO: \u001B[0mFinished getting similarity score\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8572231574198962\n"
]
}
],
"execution_count": 6
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit a1cc170

Please sign in to comment.