forked from AIHawk-FOSS/Auto_Jobs_Applier_AI_Agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpt.py
267 lines (228 loc) · 11.3 KB
/
gpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import json
import os
import re
import textwrap
from datetime import datetime
from typing import Dict, List
from dotenv import load_dotenv
from langchain_core.messages.ai import AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompt_values import StringPromptValue
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from Levenshtein import distance
import strings
load_dotenv()
class LLMLogger:
def __init__(self, llm: ChatOpenAI):
self.llm = llm
@staticmethod
def log_request(prompts, parsed_reply: Dict[str, Dict]):
calls_log = os.path.join(os.getcwd(), "open_ai_calls.json")
if isinstance(prompts, StringPromptValue):
prompts = prompts.text
elif isinstance(prompts, Dict):
# Convert prompts to a dictionary if they are not in the expected format
prompts = {
f"prompt_{i+1}": prompt.content
for i, prompt in enumerate(prompts.messages)
}
else:
prompts = {
f"prompt_{i+1}": prompt.content
for i, prompt in enumerate(prompts.messages)
}
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Extract token usage details from the response
token_usage = parsed_reply["usage_metadata"]
output_tokens = token_usage["output_tokens"]
input_tokens = token_usage["input_tokens"]
total_tokens = token_usage["total_tokens"]
# Extract model details from the response
model_name = parsed_reply["response_metadata"]["model_name"]
prompt_price_per_token = 0.00000015
completion_price_per_token = 0.0000006
# Calculate the total cost of the API call
total_cost = (input_tokens * prompt_price_per_token) + (
output_tokens * completion_price_per_token
)
# Create a log entry with all relevant information
log_entry = {
"model": model_name,
"time": current_time,
"prompts": prompts,
"replies": parsed_reply["content"], # Response content
"total_tokens": total_tokens,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"total_cost": total_cost,
}
# Write the log entry to the log file in JSON format
with open(calls_log, "a", encoding="utf-8") as f:
json_string = json.dumps(log_entry, ensure_ascii=False, indent=4)
f.write(json_string + "\n")
class LoggerChatModel:
def __init__(self, llm: ChatOpenAI):
self.llm = llm
def __call__(self, messages: List[Dict[str, str]]) -> str:
# Call the LLM with the provided messages and log the response.
reply = self.llm(messages)
parsed_reply = self.parse_llmresult(reply)
LLMLogger.log_request(prompts=messages, parsed_reply=parsed_reply)
return reply
def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]:
# Parse the LLM result into a structured format.
content = llmresult.content
response_metadata = llmresult.response_metadata
id_ = llmresult.id
usage_metadata = llmresult.usage_metadata
parsed_result = {
"content": content,
"response_metadata": {
"model_name": response_metadata.get("model_name", ""),
"system_fingerprint": response_metadata.get("system_fingerprint", ""),
"finish_reason": response_metadata.get("finish_reason", ""),
"logprobs": response_metadata.get("logprobs", None),
},
"id": id_,
"usage_metadata": {
"input_tokens": usage_metadata.get("input_tokens", 0),
"output_tokens": usage_metadata.get("output_tokens", 0),
"total_tokens": usage_metadata.get("total_tokens", 0),
},
}
return parsed_result
class GPTAnswerer:
def __init__(self, openai_api_key):
self.llm_cheap = LoggerChatModel(
ChatOpenAI(
model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.8
)
)
@property
def job_description(self):
return self.job.description
@staticmethod
def find_best_match(text: str, options: list[str]) -> str:
distances = [
(option, distance(text.lower(), option.lower())) for option in options
]
best_option = min(distances, key=lambda x: x[1])[0]
return best_option
@staticmethod
def _remove_placeholders(text: str) -> str:
text = text.replace("PLACEHOLDER", "")
return text.strip()
@staticmethod
def _preprocess_template_string(template: str) -> str:
# Preprocess a template string to remove unnecessary indentation.
return textwrap.dedent(template)
def set_resume(self, resume):
self.resume = resume
def set_job(self, job):
self.job = job
self.job.set_summarize_job_description(
self.summarize_job_description(self.job.description)
)
def summarize_job_description(self, text: str) -> str:
strings.summarize_prompt_template = self._preprocess_template_string(
strings.summarize_prompt_template
)
prompt = ChatPromptTemplate.from_template(strings.summarize_prompt_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output = chain.invoke({"text": text})
return output
def get_resume_html(self):
resume_markdown_prompt = ChatPromptTemplate.from_template(strings.resume_markdown_template)
fusion_job_description_resume_prompt = ChatPromptTemplate.from_template(strings.fusion_job_description_resume_template)
resume_markdown_chain = resume_markdown_prompt | self.llm_cheap | StrOutputParser()
fusion_job_description_resume_chain = fusion_job_description_resume_prompt | self.llm_cheap | StrOutputParser()
casual_markdown_path = os.path.abspath("resume_template/casual_markdown.js")
reorganize_header_path = os.path.abspath("resume_template/reorganizeHeader.js")
resume_css_path = os.path.abspath("resume_template/resume.css")
html_template = strings.html_template.format(casual_markdown=casual_markdown_path, reorganize_header=reorganize_header_path, resume_css=resume_css_path)
composed_chain = (
resume_markdown_chain
| (lambda output: {"job_description": self.job.summarize_job_description, "formatted_resume": output})
| fusion_job_description_resume_chain
| (lambda formatted_resume: html_template + formatted_resume)
)
try:
output = composed_chain.invoke({
"resume": self.resume,
"job_description": self.job.summarize_job_description
})
return output
except Exception as e:
#print(f"Error during elaboration: {e}")
pass
def _create_chain(self, template: str):
prompt = ChatPromptTemplate.from_template(template)
return prompt | self.llm_cheap | StrOutputParser()
def answer_question_textual_wide_range(self, question: str) -> str:
# Define chains for each section of the resume
chains = {
"personal_information": self._create_chain(strings.personal_information_template),
"self_identification": self._create_chain(strings.self_identification_template),
"legal_authorization": self._create_chain(strings.legal_authorization_template),
"work_preferences": self._create_chain(strings.work_preferences_template),
"education_details": self._create_chain(strings.education_details_template),
"experience_details": self._create_chain(strings.experience_details_template),
"projects": self._create_chain(strings.projects_template),
"availability": self._create_chain(strings.availability_template),
"salary_expectations": self._create_chain(strings.salary_expectations_template),
"certifications": self._create_chain(strings.certifications_template),
"languages": self._create_chain(strings.languages_template),
"interests": self._create_chain(strings.interests_template),
"cover_letter": self._create_chain(strings.coverletter_template),
}
section_prompt = (
f"For the following question: '{question}', which section of the resume is relevant? "
"Respond with one of the following: Personal information, Self Identification, Legal Authorization, "
"Work Preferences, Education Details, Experience Details, Projects, Availability, Salary Expectations, "
"Certifications, Languages, Interests, Cover letter"
)
prompt = ChatPromptTemplate.from_template(section_prompt)
chain = prompt | self.llm_cheap | StrOutputParser()
output = chain.invoke({"question": question})
section_name = output.lower().replace(" ", "_")
if section_name == "cover_letter":
chain = chains.get(section_name)
output= chain.invoke({"resume": self.resume, "job_description": self.job_description})
return output
resume_section = getattr(self.resume, section_name, None)
if resume_section is None:
raise ValueError(f"Section '{section_name}' not found in the resume.")
chain = chains.get(section_name)
if chain is None:
raise ValueError(f"Chain not defined for section '{section_name}'")
return chain.invoke({"resume_section": resume_section, "question": question})
def answer_question_textual(self, question: str) -> str:
template = self._preprocess_template_string(strings.resume_stuff_template)
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | self.llm_cheap | StrOutputParser()
output = chain.invoke({"resume": self.resume, "question": question})
return output
def answer_question_numeric(self, question: str, default_experience: int = 3) -> int:
func_template = self._preprocess_template_string(strings.numeric_question_template)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output_str = chain.invoke({"resume": self.resume, "question": question, "default_experience": default_experience})
try:
output = self.extract_number_from_string(output_str)
except ValueError:
output = default_experience
return output
def extract_number_from_string(self, output_str):
numbers = re.findall(r"\d+", output_str)
if numbers:
return int(numbers[0])
else:
raise ValueError("No numbers found in the string")
def answer_question_from_options(self, question: str, options: list[str]) -> str:
func_template = self._preprocess_template_string(strings.options_template)
prompt = ChatPromptTemplate.from_template(func_template)
chain = prompt | self.llm_cheap | StrOutputParser()
output_str = chain.invoke({"resume": self.resume, "question": question, "options": options})
best_option = self.find_best_match(output_str, options)
return best_option