import os import PyPDF2 import re import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM from groq import Groq import streamlit as st from docxtpl import DocxTemplate from datetime import datetime # Set your API key os.environ["GROQ_API_KEY"] = "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" # Initialize Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # --- Resume Extraction Functions --- def extract_text_from_pdf(pdf_file_path): """Extracts text from a PDF file.""" with open(pdf_file_path, 'rb') as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) text = '' for page in range(len(pdf_reader.pages)): text += pdf_reader.pages[page].extract_text() return text def extract_text_from_txt(txt_file_path): """Extracts text from a .txt file.""" with open(txt_file_path, 'r') as txt_file: text = txt_file.read() return text # --- Skill Extraction with Llama Model --- def extract_skills_llama(text): """Extracts skills from the text using the Llama model via Groq API.""" chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": f"Extract skills from the following text: {text}", } ], model="llama3-70b-8192", # Using Llama model ) skills = chat_completion.choices[0].message.content.split(', ') # Assuming skills are returned as a comma-separated list return skills # --- Job Description Processing --- def process_job_description(job_description_text): """Processes the job description text.""" # 1. Preprocess the job description text job_description_text = preprocess_text(job_description_text) # 2. Extract skills from the job description using Llama job_description_skills = extract_skills_llama(job_description_text) return job_description_skills # --- Text Preprocessing --- def preprocess_text(text): """Preprocesses text for better analysis.""" text = text.lower() # Convert to lowercase text = re.sub(r'[^\w\s]', '', text) # Remove punctuation text = re.sub(r'\s+', ' ', text) # Remove extra whitespace return text # --- Resume Similarity --- def calculate_resume_similarity(resume_text, job_description_text): """Calculates the similarity between the resume and job description using a Hugging Face model.""" model_name = "cross-encoder/stsb-roberta-base" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) inputs = tokenizer(resume_text, job_description_text, return_tensors="pt", padding=True, truncation=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) similarity_score = torch.sigmoid(outputs.logits).item() return similarity_score # --- Communication Generation --- def communication_generator(message, max_length=100): """Generates a communication response based on the input message using a Hugging Face model.""" model_name = "google/flan-t5-base" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=512) response = model.generate(**inputs, max_length=max_length, num_beams=4, early_stopping=True) generated_response = tokenizer.batch_decode(response, skip_special_tokens=True)[0] return generated_response + " We look forward to getting in touch with you soon!" # --- Sentiment Analysis --- def sentiment_model(text): """Analyzes the sentiment of the text using a Hugging Face model.""" model_name = "distilbert-base-uncased-finetuned-sst-3-literal" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) predicted_class = torch.argmax(outputs.logits).item() sentiment_labels = {0: "Negative", 1: "Neutral", 2: "Positive"} return sentiment_labels[predicted_class] # --- Placeholder Functions for Enhancement --- def enhance_resume(resume_text): """Placeholder function for enhancing the resume (you can implement your own logic here).""" return resume_text def enhance_job_description(job_description_text): """Placeholder function for enhancing the job description (you can implement your own logic here).""" return job_description_text # --- Resume Analysis Function --- def analyze_resume(resume_file, job_description_file): """Analyzes the resume and job description.""" if resume_file.name.endswith(('.pdf', '.txt')): if resume_file.name.endswith('.pdf'): resume_text = extract_text_from_pdf(resume_file.name) else: resume_text = extract_text_from_txt(resume_file.name) else: return "Invalid file type. Please upload a PDF or TXT file for the resume." if job_description_file.name.endswith('.txt'): job_description_text = extract_text_from_txt(job_description_file.name) else: return "Invalid file type. Please upload a TXT file for the job description." job_description_skills = process_job_description(job_description_text) resume_skills = extract_skills_llama(resume_text) similarity_score = calculate_resume_similarity(resume_text, job_description_text) communication_response = communication_generator(f"I am reviewing a resume for a {job_description_text} position. The candidate has the following skills: {', '.join(resume_skills)}") sentiment = sentiment_model(resume_text) enhanced_resume = enhance_resume(resume_text) enhanced_job_description = enhance_job_description(job_description_text) return ( f"## Resume and Job Description Analysis", f"**Similarity Score:** {similarity_score:.2f}", f"**Communication Response:** {communication_response}", f"**Sentiment:** {sentiment}", f"**Resume Skills:** {', '.join(resume_skills)}", f"**Job Description Skills:** {', '.join(job_description_skills)}", f"**Enhanced Resume:**\n{enhanced_resume}", f"**Enhanced Job Description:**\n{enhanced_job_description}", ) # --- Offer Letter Generation --- def generate_offer_letter(template_file, candidate_name, role, start_date, hours): """Generates an offer letter.""" # Parse the start date string try: start_date = datetime.strptime(start_date, "%Y-%m-%d").strftime("%B %d, %Y") # Format for DocxTemplate except ValueError: return "Invalid date format. Please use YYYY-MM-DD." # Define the context variables context = { 'candidate_name': candidate_name, 'role': role, 'start_date': start_date, 'hours': hours, } # Load the template document and render it with the context variables tpl = DocxTemplate(template_file.name) tpl.render(context) # Save the generated document script_dir = os.path.dirname(os.path.abspath(__file__)) docx_file_path = os.path.join(script_dir, f"{candidate_name}_offer_letter.docx") tpl.save(docx_file_path) # Return the file object return open(docx_file_path, 'rb') # --- Streamlit Interface --- st.set_page_config( page_title="HR Assistant", page_icon=":robot:", layout="wide", initial_sidebar_state="expanded", ) st.title("HR Assistant") tab1, tab2 = st.tabs(["Resume Analyzer", "Offer Letter Generator"]) with tab1: st.header("Resume and Job Description Analyzer") resume_file = st.file_uploader("Upload Resume (PDF or TXT)", type=['pdf', 'txt']) job_description_file = st.file_uploader("Upload Job Description (TXT)", type=['txt']) if resume_file is not None and job_description_file is not None: analysis_results = analyze_resume(resume_file, job_description_file) for result in analysis_results: st.markdown(result) with tab2: st.header("Offer Letter Generator") template_file = st.file_uploader("Upload Offer Letter Template (DOCX)", type=['docx']) candidate_name = st.text_input("Candidate Name") role = st.text_input("Role") start_date = st.text_input("Start Date (YYYY-MM-DD)") hours = st.number_input("Hours per Week") if template_file is not None and candidate_name and role and start_date and hours: offer_letter = generate_offer_letter(template_file, candidate_name, role, start_date, hours) st.download_button("Download Offer Letter", offer_letter, file_name=f"{candidate_name}_offer_letter.docx")