Untitled

import os
import re
import json
from langchain_openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import tiktoken
from pdfminer.high_level import extract_text
from langchain_community.adapters import openai
import google.generativeai as genai
from langchain_community.document_loaders import PyMuPDFLoader

OPENAI_API_KEY = (
    "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"  # Your OpenAI API key
)
openai.api_key = OPENAI_API_KEY

genai.configure(api_key="AIzaSyAtlxtkC9tXAddt0jjLqyTmTkbKF8Ep8Yo")

PROMPT_QUESTIONS_JD = """Extract the following information from the job description {"job_title":, "location":, "experience":, "skills":[{Technical skills[], Soft skills[]}], "CTC":, "qualifications":, "competencies and behaviour":,"roles & responsibilities":, "job_summary":}
"""

PROMPT_QUESTIONS_RESUME = """Summarize the text below into a JSON with exactly the following structure {"basic_info": {"first_name", "last_name", "full_name", "email", "phone_number", "location", "portfolio_website_url", "linkedin_url", "github_main_page_url", "university", "education_level (BS, MS, or PhD)", "graduation_month", "majors", "GPA"}, "work_experience": [{job_title, company, location, duration, job_summary}], "skills":{Technical skills:[], Soft skills:[]}, Total Work Experience:}
"""

SUMMARIZE_JD_PROMPT = """
Please summarize the given document mentioning total experience ,skillsets and qualification, don't include "About Us" Section which contains company details. Only mention the required data in JD, summarize within 100 words"""

SUMMARIZE_RESUME_PROMPT = """
Please summarize the given resume mentioning name of the candidate, total working experience ,skillsets and qualification. Particulary summarize the projects section in very short and precise and in bullet points. If candidate experience is not mentioned, then try to calculate candidate's experience from date of projects ."""


def pdf2string(pdf_path: str) -> str:
    with open(pdf_path, "rb") as file:
        text = extract_text(file)
    text = re.sub("\s[,.]", ",", text)
    text = re.sub("[\n]+", "\n", text)
    text = re.sub("[\s]+", " ", text)
    text = re.sub("http[s]?(://)?", "", text)
    return text


def parse_jd(pdf_path: str) -> dict:
    resume = {}
    pdf_str = pdf2string(pdf_path)
    prompt = PROMPT_QUESTIONS_JD + "\n" + pdf_str

    engine = "gpt-3.5-turbo-1106"

    response = openai.ChatCompletion.create(
        model=engine,
        messages=[
            {"role": "system", "content": PROMPT_QUESTIONS_JD},
            {"role": "user", "content": pdf_str},
        ],
    )

    response_text = response["choices"][0]["message"]["content"].strip()

    resume = response_text

    return resume

def remove_backticks_and_json(text):
    if text.startswith("```"):
        text = text[len("```"):]

        if text.startswith("json"):
            text = text[len("json"):]

    if text.endswith("```"):
        text = text[:-len("```")]

    text = text.strip()

    return text

def process_jd(folder_path: str):
    try:
        parsed_jd = parse_jd(folder_path)
        print(parsed_jd)
        print(f"Successfully processed JD")
        return parsed_jd

    except Exception as e:
        print(f"Error processing the JD: {str(e)}")


def parse_resume(pdf_path: str) -> dict:
    resume = {}
    pdf_str = pdf2string(pdf_path)
    prompt = PROMPT_QUESTIONS_RESUME + "\n" + pdf_str

    engine = "gpt-3.5-turbo-1106"

    response = openai.ChatCompletion.create(
        model=engine,
        messages=[
            {"role": "system", "content": PROMPT_QUESTIONS_RESUME},
            {"role": "user", "content": pdf_str},
        ],
    )

    response_text = response["choices"][0]["message"]["content"].strip()
    # print("Response Text:", response_text)

    resume = response_text

    return resume


def process_resume(file_path: str):
    try:
        parsed_resume = parse_resume(file_path)

        print(f"Successfully processed resume")
        return parsed_resume

    except Exception as e:
        print(f"Error processing the resume: {str(e)}")


def document_load(file_path: str):
    loader = PyMuPDFLoader(file_path)
    pages = loader.load()
    resume_dict = {}

    # Iterate through each document and store its content in the dictionary
    for i, document in enumerate(pages, start=1):
        page_key = f"Page_{i}"
        resume_dict[page_key] = document.page_content

    resume_json = json.dumps(resume_dict, indent=2)
    return resume_json


def summarize(pdf_content, prompt):
    model = genai.GenerativeModel("gemini-pro")
    response = model.generate_content([pdf_content, prompt])
    return response.text


def summarize_resume(file_path: str):
    try:
        parsed_resume = document_load(file_path)
        summurized_resume = summarize(parsed_resume, SUMMARIZE_RESUME_PROMPT)
        print(f"Successfully processed RESUME summary")

        return summurized_resume

    except Exception as e:
        print(f"Error processing the resume summary: {str(e)}")


def summarize_jd(file_path: str):
    try:
        parsed_jd = document_load(file_path)
        summarized_jd = summarize(parsed_jd, SUMMARIZE_JD_PROMPT)
        print(f"Successfully processed JD summary")
        return summarized_jd
    except Exception as e:
        print(f"Error processing the resume : {str(e)}")


def aggregate_score(json_data):
    parsed_data = json.loads(json_data)

    experience_match_percentage = float(
        parsed_data["Experience"]["Match Percentage"][:-1]
    )
    technical_skills_match_percentage = float(
        parsed_data["Technical Skills"]["Match Percentage"][:-1]
    )
    soft_skills_match_percentage = float(
        parsed_data["Soft Skills"]["Match Percentage"][:-1]
    )

    aggregate_match_percentage = (
        (experience_match_percentage * 0.3)
        + (technical_skills_match_percentage * 0.6)
        + (soft_skills_match_percentage * 0.1)
    )

    results = {
        "Experience Match Percentage": experience_match_percentage,
        "Technical Skills Match Percentage": technical_skills_match_percentage,
        "Soft Skills Match Percentage": soft_skills_match_percentage,
        "Aggregate Match Percentage": aggregate_match_percentage,
    }

    return results


def match_reason(jd_summary, summarized_resume, result, stack):
    template = """You are a skilled ATS (Applicant Tracking System) scanner with a deep understanding of {stack}, 
    your task is to evaluate the resume against the provided job description. Match the provide JD summary, Resume Summary, the percentage match of Experience,Technical skills and Soft Skills of candidate with respective of JD.
    Find the aggregate percentage and reason behind the percentage based upon the data provided.

    **Important:** 
    - If a candidate has more experience than required, consider it as a full match for experience.
    
    Job Description Summary:
    {jd_summary}
    \n
    Summarized Resume:
    {summarized_resume}
    \n
    Result:
    {result}
    \n
    
    Based upon the given data, give a reason for aggregate percentage only. Also mention what sort of profile candidate is more suitable for"""

    llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0)

    prompt_input = {
        "stack": stack,
        "jd_summary": jd_summary,
        "summarized_resume": summarized_resume,
        "result": result,
    }

    prompt = PromptTemplate(
        input_variables=["stack", "jd_summary", "summarized_resume", "result"],
        template=template,
    )

    chain = LLMChain(llm=llm, prompt=prompt)

    output = chain.invoke(prompt_input)

    return output["text"]


def match_profile(jd_json, res_json):

    template = """You are a skilled ATS (Applicant Tracking System) scanner with a deep understanding of {stack}, 
    your task is to evaluate the resume against the provided job description. Match the required Expereince, Technical Skills and Soft Skills of JD with
    candidate's Expereince, Technical Skills and Soft Skills. Give matching percentge of Experience,Technical skills and Soft Skills  candidate resume with JD.
    Also consider the candidate experience and experience required.

    **Important:** 
    - If a candidate has more experience than required, consider it as a full match for experience.
    - Only consider skills that are explicitly mentioned in the job description for skills matching. Exclude any candidate skills that are not listed in the JD.
    - If candidate experience is less than required experience, decrease the match percent of technical skills by 10 and soft skill by 5 as with low experience, lower the technical and soft skill will be

    Job Description:
    {job_description} of experience in {stack}
    \n
    Experienced Required:
    {expereinced_required} of experience in {stack}
    \n
    Required Technical Skills:
    {required_tech_skills}
    \n
    Required Soft Skills:
    {required_soft_skills}
    \n
    
    Below is the infomration about candidate resume.
    Candidate Name:
    {candidate_name}
    \n
    Candidate Relavant Expereince:
    {candidate_experience}
    \n
    Technical Skills:
    {candidate_tech_skills}
    \n
    Soft Skills:
    {candidate_soft_skills}
    \n

    Based on the given input, give reason for the provided match percentage.
    Give output in json format with Candidate Name,Experience,Technical skills with number of technical skill matched and Soft Skills, do not mention the aggregate percentage in reason:
    {{
        "Candidate Name": "",
        "Experience": {{
            "Candidate Experience": "",
            "Match Percentage": ""
        }},
        "Technical Skills": {{
            "Number of Technical Skills Matched / JD Technical Skills": "",
            "Match Percentage": ""
        }},
        "Soft Skills": {{
            "Number of Technical Skills Matched / JD Soft Skills": "",
            "Match Percentage": ""
        }},
        "Reason": ""
    }}
    """

    llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0)


    prompt_input = {
        "stack": jd_json["job_title"],
        "job_description": jd_json["job_summary"],
        "expereinced_required": jd_json["experience"],
        "required_tech_skills": jd_json["skills"]["Technical skills"],
        "required_soft_skills": jd_json["skills"]["Soft skills"],
        "candidate_name": res_json["basic_info"]["full_name"],
        "candidate_experience": res_json["Total Work Experience"],
        "candidate_tech_skills": res_json["skills"]["Technical skills"],
        "candidate_soft_skills": res_json["skills"]["Soft skills"],
    }

    prompt = PromptTemplate(
        input_variables=[
            "stack",
            "job_description",
            "expereinced_required",
            "required_tech_skills",
            "required_soft_skills",
            "candidate_name" "candidate_experience",
            "candidate_tech_skills",
            "candidate_soft_skills",
        ],
        template=template,
    )

    # print(prompt)
    chain = LLMChain(llm=llm, prompt=prompt)

    output = chain.invoke(prompt_input)

    return output["text"]


def main(jd_file_path: str, resume_file_path: str):
    try:
        job_description_json = process_jd(jd_file_path)
        resume_json = process_resume(resume_file_path)
        resume_json_cleared = remove_backticks_and_json(resume_json)
        print("Job Description JSON:", job_description_json)
        print("Resume JSON:", resume_json)
        data1 = json.loads(job_description_json)
        data2 = json.loads(resume_json_cleared)

        # job_description_json = "/home/anurag/Documents/langchain/rebot-mn/jd_json/parsed_resume_JD_Python_Developer_(GraphQL)_6+ years (1).pdf.json"
        # resume_json = "/home/anurag/Documents/langchain/rebot-mn/resume_json/ajay_saini.json"
        # with open(job_description_json, 'r') as file:
        #     jd_json = json.load(file)
        # with open(resume_json, 'r') as file:
        #     res_json = json.load(file)

        job_description_summary = summarize_jd(jd_file_path)
        resume_summary = summarize_resume(resume_file_path)

        if (
            "Skills" in resume_json
            and isinstance(resume_json["skills"], list)
            and resume_json["skills"]
        ):
            if "Technical skills" in resume_json["skills"][0]:
                technical_skills = resume_json["skills"][0]["Technical skills"]
                print(technical_skills)
            else:
                print(f"Error: 'Technical skills' key not found in the 'Skills' list.")
        else:
            print(f"Error: 'Skills' key not found or empty in the JSON.")

        result = match_profile(data1, data2)
        aggregate = aggregate_score(result)
        reason_resut = match_reason(job_description_summary, resume_summary, aggregate, data1['job_title'])

        print(result)
        print("\n")
        print(aggregate)
        print(reason_resut)
        print("--------------------------------\n\n--------------------------------")

        #jsonify the output as per needed and return

    except json.decoder.JSONDecodeError as e:
        print(f"Error decoding JSON in file : {e}")
    except FileNotFoundError:
        print(f"File not found")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == '__main__':
    jd_path = input("Enter file path of JD: ")
    resume_path = input("Enter file path of Resume: ")

    main(jd_path,resume_path)
Editor is loading...