res / resume.py
sarahai's picture
Upload 2 files
63c27c4 verified
raw
history blame
No virus
1.67 kB
import streamlit as st
import PyPDF2
import docx
import spacy
# Load NLP model
@st.cache_resource
def load_model():
return spacy.load("en_core_web_sm")
nlp = load_model()
# Function to extract text
def extract_text(file):
if file.type == "application/pdf":
reader = PyPDF2.PdfFileReader(file)
return "".join([reader.getPage(i).extract_text() for i in range(reader.numPages)])
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return "\n".join([para.text for para in docx.Document(file).paragraphs])
# Function to extract and display information
def analyze_resume(text, job_desc):
doc = nlp(text)
extracted_info = {"skills": [], "education": [], "experience": []}
for ent in doc.ents:
if ent.label_ in ["ORG", "GPE"]:
extracted_info["education"].append(ent.text)
elif ent.label_ == "DATE":
extracted_info["experience"].append(ent.text)
match_score = sum(1 for token in nlp(job_desc) if token.text in text) / len(job_desc.split()) * 100
return extracted_info, match_score
# Streamlit interface
st.title("Resume Scanner with NLP")
uploaded_file = st.file_uploader("Upload a resume (PDF or DOCX)", type=["pdf", "docx"])
job_description = st.text_area("Paste the job description here")
if uploaded_file and job_description:
resume_text = extract_text(uploaded_file)
extracted_info, match_score = analyze_resume(resume_text, job_description)
st.subheader("Extracted Information:")
st.write(extracted_info)
st.subheader(f"Match Score: {match_score:.2f}%")