import streamlit as st import PyPDF2 import docx import spacy # Load NLP model @st.cache_resource def load_model(): return spacy.load("en_core_web_sm") nlp = load_model() # Function to extract text def extract_text(file): if file.type == "application/pdf": reader = PyPDF2.PdfFileReader(file) return "".join([reader.getPage(i).extract_text() for i in range(reader.numPages)]) elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return "\n".join([para.text for para in docx.Document(file).paragraphs]) # Function to extract and display information def analyze_resume(text, job_desc): doc = nlp(text) extracted_info = {"skills": [], "education": [], "experience": []} for ent in doc.ents: if ent.label_ in ["ORG", "GPE"]: extracted_info["education"].append(ent.text) elif ent.label_ == "DATE": extracted_info["experience"].append(ent.text) match_score = sum(1 for token in nlp(job_desc) if token.text in text) / len(job_desc.split()) * 100 return extracted_info, match_score # Streamlit interface st.title("Resume Scanner with NLP") uploaded_file = st.file_uploader("Upload a resume (PDF or DOCX)", type=["pdf", "docx"]) job_description = st.text_area("Paste the job description here") if uploaded_file and job_description: resume_text = extract_text(uploaded_file) extracted_info, match_score = analyze_resume(resume_text, job_description) st.subheader("Extracted Information:") st.write(extracted_info) st.subheader(f"Match Score: {match_score:.2f}%")