Skip to content

Commit

Permalink
add resume key value extract API
Browse files Browse the repository at this point in the history
  • Loading branch information
Sdddell committed Nov 1, 2024
1 parent 34a7afd commit 7f137d1
Show file tree
Hide file tree
Showing 3 changed files with 602 additions and 0 deletions.
101 changes: 101 additions & 0 deletions any_parser/any_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(self, api_key: str, base_url: str = PUBLIC_SHARED_BASE_URL) -> None
"""
self._sync_extract_url = f"{base_url}/extract"
self._sync_json_url = f"{base_url}/json/extract"
self._sync_resume_url = f"{base_url}/resume/extract"
self._sync_refined_url = f"{base_url}/refined_parse"
self._async_upload_url = f"{base_url}/async/upload"
self._async_fetch_url = f"{base_url}/async/fetch"
Expand Down Expand Up @@ -188,6 +189,66 @@ def extract_key_value(
else:
return f"Error: {response.status_code} {response.text}", None

def resume_extract(
self,
file_path: str,
) -> Tuple[str, str]:
"""Extract resume in real-time.
Args:
file_path (str): The path to the file to be parsed.
Returns:
tuple(str, str): The extracted data and the time taken.
extracted data includes:
- "education": Education
- "work_experience": Work Experience
- "personal_info": Personal Information
- "skills": Skills
- "certifications": Certifications
- "projects": Projects
- "pii": Personally Identifiable Information - includes only name, email, and phone
"""
file_extension = Path(file_path).suffix.lower().lstrip(".")

# Check if the file exists and file_type
error = check_file_type_and_path(file_path, file_extension)
if error:
return error, None

# Encode the file content in base64
with open(file_path, "rb") as file:
encoded_file = base64.b64encode(file.read()).decode("utf-8")

# Create the JSON payload
payload = {
"file_content": encoded_file,
"file_type": file_extension,
}

# Send the POST request
start_time = time.time()
response = requests.post(
self._sync_resume_url,
headers=self._headers,
data=json.dumps(payload),
timeout=TIMEOUT,
)
end_time = time.time()

# Check if the request was successful
if response.status_code == 200:
try:
response_data = response.json()
result = response_data["extraction_result"]
return (
result,
f"Time Elapsed: {end_time - start_time:.2f} seconds",
)
except json.JSONDecodeError:
return f"Error: Invalid JSON response: {response.text}", None
else:
return f"Error: {response.status_code} {response.text}", None

def async_extract(
self,
file_path: str,
Expand Down Expand Up @@ -289,6 +350,44 @@ def async_extract_key_value(
# If response successful, upload the file
return upload_file_to_presigned_url(file_path, response)

def async_extract_resume_key_value(
self,
file_path: str,
) -> str:
"""Extract key-value pairs from a file asynchronously.
Args:
file_path (str): The path to the file to be parsed.
Returns:
str: The file id of the uploaded file.
"""
file_extension = Path(file_path).suffix.lower().lstrip(".")

# Check if the file exists and file_type
error = check_file_type_and_path(file_path, file_extension)

if error:
return error, None

file_name = Path(file_path).name

# Create the JSON payload
payload = {
"file_name": file_name,
"process_type": "resume_extract",
}

# Send the POST request
response = requests.post(
self._async_upload_url,
headers=self._headers,
data=json.dumps(payload),
timeout=TIMEOUT,
)

# If response successful, upload the file
return upload_file_to_presigned_url(file_path, response)

def async_fetch(
self,
file_id: str,
Expand Down Expand Up @@ -340,6 +439,8 @@ def async_fetch(
result = response.json()
if "json" in result:
return result["json"]
elif "resume_extraction" in result:
return result["resume_extraction"]
elif "markdown" in result:
markdown_list = result["markdown"]
return "\n".join(markdown_list)
Expand Down
261 changes: 261 additions & 0 deletions examples/async_extract_resume_key_value.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install the libraries (ipython is used for displaying markdown in this demo)\n",
"# !pip3 install --upgrade ipython\n",
"# !pip3 install --upgrade any-parser"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display\n",
"from any_parser import AnyParser"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ap = AnyParser(api_key=\"...\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"file_path = \"./sample_data/resume_1.pdf\"\n",
"file_id = ap.async_extract_resume_key_value(file_path)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Waiting for response...\n",
"Waiting for response...\n",
"Waiting for response...\n",
"Waiting for response...\n",
"Waiting for response...\n",
"Waiting for response...\n"
]
}
],
"source": [
"json_result = ap.async_fetch(file_id=file_id)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'pii': {'full_name': 'Gary Jiang',\n",
" 'email': '[email protected]',\n",
" 'phone': '+1 (213) 725-7637'},\n",
" 'education': [{'organization': 'Shenyang University of Technology',\n",
" 'degree': \"Bachelor's Degree\",\n",
" 'major': 'Computer Science',\n",
" 'start_date': '2008-01-01',\n",
" 'end_date': '2012-12-31',\n",
" 'courses': None,\n",
" 'achievements': None}],\n",
" 'work_experience': [{'job_title': 'Full Stack Developer',\n",
" 'company_name': 'VIMMERSE',\n",
" 'location': None,\n",
" 'start_date': '2023-06-01',\n",
" 'end_date': 'present',\n",
" 'job_type': None,\n",
" 'summary': 'Developed an AI-powered editor web application that brings photos to life by animating images in 3D.',\n",
" 'bullet_points': ['Developed robust back-end services utilizing Python (Flask/FastAPI) and Node.js (AWS Lambda) for efficient and scalable web applications.',\n",
" 'Built user-friendly and interactive websites using Next.js, ensuring a seamless user experience.',\n",
" 'Deployed and managed AWS infrastructure, including EC2 instances, S3 storage buckets, DynamoDB for NoSQL data management, and Cognito user pools for secure user authentication.',\n",
" 'Experienced Agile and Scrum methodologies within a fast-paced startup environment to ensure efficient project delivery and continuous improvement.',\n",
" 'Collaborated effectively with cross-functional teams (design, product management) to deliver projects on time, fostering a positive and collaborative work environment.']},\n",
" {'job_title': 'Full Stack Developer',\n",
" 'company_name': 'VIKING SASQUATCH',\n",
" 'location': None,\n",
" 'start_date': '2023-01-01',\n",
" 'end_date': '2023-06-01',\n",
" 'job_type': None,\n",
" 'summary': 'Developed APIs and Integrations for all of the parties that work on Real Estate Transactions.',\n",
" 'bullet_points': ['Connecting Mortgage, Title, and Real Estate to solve pain points and improve automation and create efficiencies.',\n",
" 'Implemented a user-friendly front-end interface using Nuxt.js, ensuring a seamless user experience.',\n",
" 'Built backend APIs utilizing Node.js serverless functions for optimal performance.',\n",
" 'Managed data storage and security by implementing a MySQL database.',\n",
" 'Collaborated effectively within a team using Agile methodologies like sprint planning, daily standups, retrospectives to ensure project delivery and continuous improvement.']},\n",
" {'job_title': 'Full Stack Developer',\n",
" 'company_name': 'ROX PAY SRL',\n",
" 'location': None,\n",
" 'start_date': '2021-12-01',\n",
" 'end_date': '2022-12-31',\n",
" 'job_type': None,\n",
" 'summary': 'Built Fintech Software House that aims to optimize B2B payments by offering a systemic solution that gives value-added services in collection of payments, financial information and corporate liquidity.',\n",
" 'bullet_points': ['Developed front-end by using React.js and Redux, Javascript/Typescript.',\n",
" 'Contributed developing backend utilizing Django/Python.']},\n",
" {'job_title': 'Freelancer',\n",
" 'company_name': 'FREELANCE',\n",
" 'location': None,\n",
" 'start_date': '2017-09-01',\n",
" 'end_date': '2021-10-31',\n",
" 'job_type': None,\n",
" 'summary': 'Developed and managed many web and mobile applications while working as freelancer at Internet Dzyns LLC company.',\n",
" 'bullet_points': ['Developed multiple web applications, participating in the whole process of their development: product design and estimation, code design and development, DevOps, UI/UX design, product launch and maintenance.',\n",
" 'Developed cross-platform mobile application using Flutter and Ionic/Angular.',\n",
" 'Developed NFT marketplace websites and wrote smart contracts.']},\n",
" {'job_title': 'Server Administrator, Java Developer',\n",
" 'company_name': 'NEUSOFT',\n",
" 'location': None,\n",
" 'start_date': '2014-06-01',\n",
" 'end_date': '2017-08-31',\n",
" 'job_type': None,\n",
" 'summary': 'Worked as intern and software developer after graduated university.',\n",
" 'bullet_points': ['Correct analytical and reasoning skills to troubleshoot and repair server issues.',\n",
" 'Operating Systems & Security Software.',\n",
" 'Java / Spring Boot / Hibernate.']}],\n",
" 'personal_info': {'name': 'GARY JIANG',\n",
" 'phone_number': '+1-213-725-7637',\n",
" 'address': None,\n",
" 'email_address': '[email protected]',\n",
" 'linkedin_url': 'linkedin.com/in/gary-jiang',\n",
" 'github_url': None,\n",
" 'summary': None},\n",
" 'skills': {'Programming Languages': ['Python',\n",
" 'PHP',\n",
" 'Javascript',\n",
" 'Typescript',\n",
" 'HTML',\n",
" 'CSS'],\n",
" 'Tools': ['Flask',\n",
" 'Django',\n",
" 'FastAPI',\n",
" 'Laravel',\n",
" 'Node.js',\n",
" 'SQL databases',\n",
" 'Next.js',\n",
" 'React',\n",
" 'Redux',\n",
" 'Nuxt.js',\n",
" 'Vue',\n",
" 'AWS Lambda',\n",
" 'Cognito',\n",
" 'EC2',\n",
" 'S3',\n",
" 'DynamoDB',\n",
" 'API Gateway',\n",
" 'Flutter',\n",
" 'Ionic',\n",
" 'Angular',\n",
" 'Git',\n",
" 'Version Control'],\n",
" 'Other': ['Startup Experience',\n",
" 'Adaptable',\n",
" 'Resourceful',\n",
" 'Prioritization',\n",
" 'Hybrid Mobile App Development',\n",
" 'AGILE',\n",
" 'SCRUM',\n",
" 'DevOps',\n",
" 'CI/CD']},\n",
" 'certifications': [],\n",
" 'projects': [{'organization': 'VIMMERSE',\n",
" 'project_name': 'AI-powered Editor Web Application',\n",
" 'location': None,\n",
" 'start_date': '2023-06-01',\n",
" 'end_date': 'present',\n",
" 'descriptions': ['Developed an AI-powered editor web application that brings photos to life by animating images in 3D.',\n",
" 'Developed robust back-end services utilizing Python (Flask/FastAPI) and Node.js (AWS Lambda) for efficient and scalable web applications.',\n",
" 'Built user-friendly and interactive websites using Next.js, ensuring a seamless user experience.',\n",
" 'Deployed and managed AWS infrastructure, including EC2 instances, S3 storage buckets, DynamoDB for NoSQL data management, and Cognito user pools for secure user authentication.',\n",
" 'Experienced Agile and Scrum methodologies within a fast-paced startup environment to ensure efficient project delivery and continuous improvement.',\n",
" 'Collaborated effectively with cross-functional teams (design, product management) to deliver projects on time, fostering a positive and collaborative work environment.']},\n",
" {'organization': 'VIKING SASQUATCH',\n",
" 'project_name': 'Real Estate Transactions API and Integrations',\n",
" 'location': None,\n",
" 'start_date': '2023-01-01',\n",
" 'end_date': '2023-06-30',\n",
" 'descriptions': ['Developed APIs and Integrations for all of the parties that work on Real Estate Transactions.',\n",
" 'Connecting Mortgage, Title, and Real Estate to solve pain points and improve automation and create efficiencies.',\n",
" 'Implemented a user-friendly front-end interface using Nuxt.js, ensuring a seamless user experience.',\n",
" 'Built backend APIs utilizing Node.js serverless functions for optimal performance.',\n",
" 'Managed data storage and security by implementing a MySQL database.',\n",
" 'Collaborated effectively within a team using Agile methodologies like sprint planning, daily standups, retrospectives to ensure project delivery and continuous improvement.']},\n",
" {'organization': 'ROX PAY SRL',\n",
" 'project_name': 'Fintech Software House',\n",
" 'location': None,\n",
" 'start_date': '2021-12-01',\n",
" 'end_date': '2022-12-31',\n",
" 'descriptions': ['Built Fintech Software House that aims to optimize B2B payments by offering a systemic solution that gives value-added services in collection of payments, financial information and corporate liquidity by essentially creating a Commission Free, Open Loop, Payment Gateway system.',\n",
" 'Developed front-end by using React.js and Redux, Javascript/Typescript.',\n",
" 'Contributed developing backend utilizing Django/Python.']},\n",
" {'organization': 'FREELANCE',\n",
" 'project_name': 'Various Web and Mobile Applications',\n",
" 'location': None,\n",
" 'start_date': '2017-09-01',\n",
" 'end_date': '2021-10-31',\n",
" 'descriptions': ['Developed and managed many web and mobile applications while working as freelancer at Internet Dzyns LLC company.',\n",
" 'Developed multiple web applications, participating in the whole process of their development: product design and estimation, code design and development, DevOps, UI/UX design, product launch and maintenance.',\n",
" 'Developed cross-platform mobile application using Flutter and Ionic/Angular.',\n",
" 'Developed NFT marketplace websites and wrote smart contracts.']},\n",
" {'organization': 'NEUSOFT',\n",
" 'project_name': 'Server Administration and Java Development',\n",
" 'location': None,\n",
" 'start_date': '2014-06-01',\n",
" 'end_date': '2017-08-31',\n",
" 'descriptions': ['Worked as intern and software developer after graduated university.',\n",
" 'Correct analytical and reasoning skills to troubleshoot and repair server issues.',\n",
" 'Operating Systems & Security Software.',\n",
" 'Java / Spring Boot / Hibernate.']}]}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(json_result)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "any",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 7f137d1

Please sign in to comment.