|
import streamlit as st
|
|
import PyPDF2
|
|
import docx
|
|
import spacy
|
|
|
|
|
|
@st.cache_resource
|
|
def load_model():
|
|
return spacy.load("en_core_web_sm")
|
|
|
|
nlp = load_model()
|
|
|
|
|
|
def extract_text(file):
|
|
if file.type == "application/pdf":
|
|
reader = PyPDF2.PdfFileReader(file)
|
|
return "".join([reader.getPage(i).extract_text() for i in range(reader.numPages)])
|
|
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
|
return "\n".join([para.text for para in docx.Document(file).paragraphs])
|
|
|
|
|
|
def analyze_resume(text, job_desc):
|
|
doc = nlp(text)
|
|
extracted_info = {"skills": [], "education": [], "experience": []}
|
|
|
|
for ent in doc.ents:
|
|
if ent.label_ in ["ORG", "GPE"]:
|
|
extracted_info["education"].append(ent.text)
|
|
elif ent.label_ == "DATE":
|
|
extracted_info["experience"].append(ent.text)
|
|
|
|
match_score = sum(1 for token in nlp(job_desc) if token.text in text) / len(job_desc.split()) * 100
|
|
return extracted_info, match_score
|
|
|
|
|
|
st.title("Resume Scanner with NLP")
|
|
uploaded_file = st.file_uploader("Upload a resume (PDF or DOCX)", type=["pdf", "docx"])
|
|
job_description = st.text_area("Paste the job description here")
|
|
|
|
if uploaded_file and job_description:
|
|
resume_text = extract_text(uploaded_file)
|
|
extracted_info, match_score = analyze_resume(resume_text, job_description)
|
|
|
|
st.subheader("Extracted Information:")
|
|
st.write(extracted_info)
|
|
st.subheader(f"Match Score: {match_score:.2f}%")
|
|
|