Spaces:

sarahai
/

res

Sleeping

res / app.py

Rename resume.py to app.py

cff11e0 verified 3 months ago

1.67 kB

	import streamlit as st
	import PyPDF2
	import docx
	import spacy

	# Load NLP model
	@st.cache_resource
	def load_model():
	return spacy.load("en_core_web_sm")

	nlp = load_model()

	# Function to extract text
	def extract_text(file):
	if file.type == "application/pdf":
	reader = PyPDF2.PdfFileReader(file)
	return "".join([reader.getPage(i).extract_text() for i in range(reader.numPages)])
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	return "\n".join([para.text for para in docx.Document(file).paragraphs])

	# Function to extract and display information
	def analyze_resume(text, job_desc):
	doc = nlp(text)
	extracted_info = {"skills": [], "education": [], "experience": []}

	for ent in doc.ents:
	if ent.label_ in ["ORG", "GPE"]:
	extracted_info["education"].append(ent.text)
	elif ent.label_ == "DATE":
	extracted_info["experience"].append(ent.text)

	match_score = sum(1 for token in nlp(job_desc) if token.text in text) / len(job_desc.split()) * 100
	return extracted_info, match_score

	# Streamlit interface
	st.title("Resume Scanner with NLP")
	uploaded_file = st.file_uploader("Upload a resume (PDF or DOCX)", type=["pdf", "docx"])
	job_description = st.text_area("Paste the job description here")

	if uploaded_file and job_description:
	resume_text = extract_text(uploaded_file)
	extracted_info, match_score = analyze_resume(resume_text, job_description)

	st.subheader("Extracted Information:")
	st.write(extracted_info)
	st.subheader(f"Match Score: {match_score:.2f}%")