Untitled
unknown
plain_text
a year ago
14 kB
20
Indexable
import os import re import json from langchain_openai import OpenAI from langchain.chains import LLMChain from langchain.prompts import PromptTemplate import tiktoken from pdfminer.high_level import extract_text from langchain_community.adapters import openai import google.generativeai as genai from langchain_community.document_loaders import PyMuPDFLoader OPENAI_API_KEY = ( "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" # Your OpenAI API key ) openai.api_key = OPENAI_API_KEY genai.configure(api_key="AIzaSyAtlxtkC9tXAddt0jjLqyTmTkbKF8Ep8Yo") PROMPT_QUESTIONS_JD = """Extract the following information from the job description {"job_title":, "location":, "experience":, "skills":[{Technical skills[], Soft skills[]}], "CTC":, "qualifications":, "competencies and behaviour":,"roles & responsibilities":, "job_summary":} """ PROMPT_QUESTIONS_RESUME = """Summarize the text below into a JSON with exactly the following structure {"basic_info": {"first_name", "last_name", "full_name", "email", "phone_number", "location", "portfolio_website_url", "linkedin_url", "github_main_page_url", "university", "education_level (BS, MS, or PhD)", "graduation_month", "majors", "GPA"}, "work_experience": [{job_title, company, location, duration, job_summary}], "skills":{Technical skills:[], Soft skills:[]}, Total Work Experience:} """ SUMMARIZE_JD_PROMPT = """ Please summarize the given document mentioning total experience ,skillsets and qualification, don't include "About Us" Section which contains company details. Only mention the required data in JD, summarize within 100 words""" SUMMARIZE_RESUME_PROMPT = """ Please summarize the given resume mentioning name of the candidate, total working experience ,skillsets and qualification. Particulary summarize the projects section in very short and precise and in bullet points. If candidate experience is not mentioned, then try to calculate candidate's experience from date of projects .""" def pdf2string(pdf_path: str) -> str: with open(pdf_path, "rb") as file: text = extract_text(file) text = re.sub("\s[,.]", ",", text) text = re.sub("[\n]+", "\n", text) text = re.sub("[\s]+", " ", text) text = re.sub("http[s]?(://)?", "", text) return text def parse_jd(pdf_path: str) -> dict: resume = {} pdf_str = pdf2string(pdf_path) prompt = PROMPT_QUESTIONS_JD + "\n" + pdf_str engine = "gpt-3.5-turbo-1106" response = openai.ChatCompletion.create( model=engine, messages=[ {"role": "system", "content": PROMPT_QUESTIONS_JD}, {"role": "user", "content": pdf_str}, ], ) response_text = response["choices"][0]["message"]["content"].strip() resume = response_text return resume def remove_backticks_and_json(text): if text.startswith("```"): text = text[len("```"):] if text.startswith("json"): text = text[len("json"):] if text.endswith("```"): text = text[:-len("```")] text = text.strip() return text def process_jd(folder_path: str): try: parsed_jd = parse_jd(folder_path) print(parsed_jd) print(f"Successfully processed JD") return parsed_jd except Exception as e: print(f"Error processing the JD: {str(e)}") def parse_resume(pdf_path: str) -> dict: resume = {} pdf_str = pdf2string(pdf_path) prompt = PROMPT_QUESTIONS_RESUME + "\n" + pdf_str engine = "gpt-3.5-turbo-1106" response = openai.ChatCompletion.create( model=engine, messages=[ {"role": "system", "content": PROMPT_QUESTIONS_RESUME}, {"role": "user", "content": pdf_str}, ], ) response_text = response["choices"][0]["message"]["content"].strip() # print("Response Text:", response_text) resume = response_text return resume def process_resume(file_path: str): try: parsed_resume = parse_resume(file_path) print(f"Successfully processed resume") return parsed_resume except Exception as e: print(f"Error processing the resume: {str(e)}") def document_load(file_path: str): loader = PyMuPDFLoader(file_path) pages = loader.load() resume_dict = {} # Iterate through each document and store its content in the dictionary for i, document in enumerate(pages, start=1): page_key = f"Page_{i}" resume_dict[page_key] = document.page_content resume_json = json.dumps(resume_dict, indent=2) return resume_json def summarize(pdf_content, prompt): model = genai.GenerativeModel("gemini-pro") response = model.generate_content([pdf_content, prompt]) return response.text def summarize_resume(file_path: str): try: parsed_resume = document_load(file_path) summurized_resume = summarize(parsed_resume, SUMMARIZE_RESUME_PROMPT) print(f"Successfully processed RESUME summary") return summurized_resume except Exception as e: print(f"Error processing the resume summary: {str(e)}") def summarize_jd(file_path: str): try: parsed_jd = document_load(file_path) summarized_jd = summarize(parsed_jd, SUMMARIZE_JD_PROMPT) print(f"Successfully processed JD summary") return summarized_jd except Exception as e: print(f"Error processing the resume : {str(e)}") def aggregate_score(json_data): parsed_data = json.loads(json_data) experience_match_percentage = float( parsed_data["Experience"]["Match Percentage"][:-1] ) technical_skills_match_percentage = float( parsed_data["Technical Skills"]["Match Percentage"][:-1] ) soft_skills_match_percentage = float( parsed_data["Soft Skills"]["Match Percentage"][:-1] ) aggregate_match_percentage = ( (experience_match_percentage * 0.3) + (technical_skills_match_percentage * 0.6) + (soft_skills_match_percentage * 0.1) ) results = { "Experience Match Percentage": experience_match_percentage, "Technical Skills Match Percentage": technical_skills_match_percentage, "Soft Skills Match Percentage": soft_skills_match_percentage, "Aggregate Match Percentage": aggregate_match_percentage, } return results def match_reason(jd_summary, summarized_resume, result, stack): template = """You are a skilled ATS (Applicant Tracking System) scanner with a deep understanding of {stack}, your task is to evaluate the resume against the provided job description. Match the provide JD summary, Resume Summary, the percentage match of Experience,Technical skills and Soft Skills of candidate with respective of JD. Find the aggregate percentage and reason behind the percentage based upon the data provided. **Important:** - If a candidate has more experience than required, consider it as a full match for experience. Job Description Summary: {jd_summary} \n Summarized Resume: {summarized_resume} \n Result: {result} \n Based upon the given data, give a reason for aggregate percentage only. Also mention what sort of profile candidate is more suitable for""" llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) prompt_input = { "stack": stack, "jd_summary": jd_summary, "summarized_resume": summarized_resume, "result": result, } prompt = PromptTemplate( input_variables=["stack", "jd_summary", "summarized_resume", "result"], template=template, ) chain = LLMChain(llm=llm, prompt=prompt) output = chain.invoke(prompt_input) return output["text"] def match_profile(jd_json, res_json): template = """You are a skilled ATS (Applicant Tracking System) scanner with a deep understanding of {stack}, your task is to evaluate the resume against the provided job description. Match the required Expereince, Technical Skills and Soft Skills of JD with candidate's Expereince, Technical Skills and Soft Skills. Give matching percentge of Experience,Technical skills and Soft Skills candidate resume with JD. Also consider the candidate experience and experience required. **Important:** - If a candidate has more experience than required, consider it as a full match for experience. - Only consider skills that are explicitly mentioned in the job description for skills matching. Exclude any candidate skills that are not listed in the JD. - If candidate experience is less than required experience, decrease the match percent of technical skills by 10 and soft skill by 5 as with low experience, lower the technical and soft skill will be Job Description: {job_description} of experience in {stack} \n Experienced Required: {expereinced_required} of experience in {stack} \n Required Technical Skills: {required_tech_skills} \n Required Soft Skills: {required_soft_skills} \n Below is the infomration about candidate resume. Candidate Name: {candidate_name} \n Candidate Relavant Expereince: {candidate_experience} \n Technical Skills: {candidate_tech_skills} \n Soft Skills: {candidate_soft_skills} \n Based on the given input, give reason for the provided match percentage. Give output in json format with Candidate Name,Experience,Technical skills with number of technical skill matched and Soft Skills, do not mention the aggregate percentage in reason: {{ "Candidate Name": "", "Experience": {{ "Candidate Experience": "", "Match Percentage": "" }}, "Technical Skills": {{ "Number of Technical Skills Matched / JD Technical Skills": "", "Match Percentage": "" }}, "Soft Skills": {{ "Number of Technical Skills Matched / JD Soft Skills": "", "Match Percentage": "" }}, "Reason": "" }} """ llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) prompt_input = { "stack": jd_json["job_title"], "job_description": jd_json["job_summary"], "expereinced_required": jd_json["experience"], "required_tech_skills": jd_json["skills"]["Technical skills"], "required_soft_skills": jd_json["skills"]["Soft skills"], "candidate_name": res_json["basic_info"]["full_name"], "candidate_experience": res_json["Total Work Experience"], "candidate_tech_skills": res_json["skills"]["Technical skills"], "candidate_soft_skills": res_json["skills"]["Soft skills"], } prompt = PromptTemplate( input_variables=[ "stack", "job_description", "expereinced_required", "required_tech_skills", "required_soft_skills", "candidate_name" "candidate_experience", "candidate_tech_skills", "candidate_soft_skills", ], template=template, ) # print(prompt) chain = LLMChain(llm=llm, prompt=prompt) output = chain.invoke(prompt_input) return output["text"] def main(jd_file_path: str, resume_file_path: str): try: job_description_json = process_jd(jd_file_path) resume_json = process_resume(resume_file_path) resume_json_cleared = remove_backticks_and_json(resume_json) print("Job Description JSON:", job_description_json) print("Resume JSON:", resume_json) data1 = json.loads(job_description_json) data2 = json.loads(resume_json_cleared) # job_description_json = "/home/anurag/Documents/langchain/rebot-mn/jd_json/parsed_resume_JD_Python_Developer_(GraphQL)_6+ years (1).pdf.json" # resume_json = "/home/anurag/Documents/langchain/rebot-mn/resume_json/ajay_saini.json" # with open(job_description_json, 'r') as file: # jd_json = json.load(file) # with open(resume_json, 'r') as file: # res_json = json.load(file) job_description_summary = summarize_jd(jd_file_path) resume_summary = summarize_resume(resume_file_path) if ( "Skills" in resume_json and isinstance(resume_json["skills"], list) and resume_json["skills"] ): if "Technical skills" in resume_json["skills"][0]: technical_skills = resume_json["skills"][0]["Technical skills"] print(technical_skills) else: print(f"Error: 'Technical skills' key not found in the 'Skills' list.") else: print(f"Error: 'Skills' key not found or empty in the JSON.") result = match_profile(data1, data2) aggregate = aggregate_score(result) reason_resut = match_reason(job_description_summary, resume_summary, aggregate, data1['job_title']) print(result) print("\n") print(aggregate) print(reason_resut) print("--------------------------------\n\n--------------------------------") #jsonify the output as per needed and return except json.decoder.JSONDecodeError as e: print(f"Error decoding JSON in file : {e}") except FileNotFoundError: print(f"File not found") except Exception as e: print(f"An unexpected error occurred: {e}") if __name__ == '__main__': jd_path = input("Enter file path of JD: ") resume_path = input("Enter file path of Resume: ") main(jd_path,resume_path)
Editor is loading...
Leave a Comment